wordlist 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,34 +1,39 @@
1
1
  module Wordlist
2
2
  module Parsers
3
- def self.included(base)
4
- base.module_eval do
5
- # Ignore case of parsed text
6
- attr_accessor :ignore_case
3
+ # Ignore case of parsed text
4
+ attr_accessor :ignore_case
7
5
 
8
- # Ignore the punctuation of parsed text
9
- attr_accessor :ignore_punctuation
6
+ # Ignore the punctuation of parsed text
7
+ attr_accessor :ignore_punctuation
10
8
 
11
- # Ignore URLs
12
- attr_accessor :ignore_urls
9
+ # Ignore URLs
10
+ attr_accessor :ignore_urls
13
11
 
14
- # Ignore Phone numbers
15
- attr_accessor :ignore_phone_numbers
12
+ # Ignore Phone numbers
13
+ attr_accessor :ignore_phone_numbers
16
14
 
17
- # Ignore References
18
- attr_accessor :ignore_references
19
- end
20
- end
15
+ # Ignore References
16
+ attr_accessor :ignore_references
21
17
 
18
+ #
19
+ # Initializes the parsers settings.
20
+ #
22
21
  def initialize
23
- @ignore_case = false
24
- @ignore_punctuation = true
25
- @ignore_urls = true
22
+ @ignore_case = false
23
+ @ignore_punctuation = true
24
+ @ignore_urls = true
26
25
  @ignore_phone_numbers = false
27
- @ignore_references = false
26
+ @ignore_references = false
28
27
  end
29
28
 
30
29
  #
31
- # Parses the specified _text_ and returns an Array of tokens.
30
+ # Parses the given text.
31
+ #
32
+ # @param [String] text
33
+ # The text to parse.
34
+ #
35
+ # @return [Array<String>]
36
+ # The Array of parsed tokens.
32
37
  #
33
38
  def parse(text)
34
39
  text = text.to_s
@@ -0,0 +1,2 @@
1
+ require 'wordlist/runners/runner'
2
+ require 'wordlist/runners/list'
@@ -0,0 +1,116 @@
1
+ require 'wordlist/runners/runner'
2
+ require 'wordlist/flat_file'
3
+
4
+ module Wordlist
5
+ module Runners
6
+ class List < Runner
7
+
8
+ #
9
+ # Creates a new List Runner.
10
+ #
11
+ def initialize
12
+ @file = nil
13
+ @min_length = nil
14
+ @max_length = nil
15
+ @mutations = []
16
+
17
+ @words = false
18
+ @unique_words = false
19
+
20
+ @output = nil
21
+ end
22
+
23
+ #
24
+ # Runs the list runner.
25
+ #
26
+ # @param [Array<String>] args
27
+ # Arguments to run the runner with.
28
+ #
29
+ def run(*args)
30
+ super(*args)
31
+
32
+ list = if @file
33
+ FlatFile.new(
34
+ @file,
35
+ :min_length => @min_length,
36
+ :max_length => @max_length
37
+ )
38
+ else
39
+ print_error('the --file option must be specified')
40
+ exit -1
41
+ end
42
+
43
+ @mutations.each do |pattern,substitute|
44
+ list.mutate(pattern,substitute)
45
+ end
46
+
47
+ words = lambda { |output|
48
+ puts = output.method(:puts)
49
+
50
+ if @unique_words
51
+ list.each_unique(&puts)
52
+ elsif @words
53
+ list.each_word(&puts)
54
+ else
55
+ list.each(&puts)
56
+ end
57
+ }
58
+
59
+ if @output
60
+ File.open(@output,'w+',&words)
61
+ else
62
+ words.call(Kernel)
63
+ end
64
+ end
65
+
66
+ protected
67
+
68
+ #
69
+ # Parses the given arguments.
70
+ #
71
+ # @param [Array<String>] args
72
+ # Arguments to parse.
73
+ #
74
+ def optparse(*args)
75
+ super(*args) do |opts|
76
+ opts.banner = 'usage: wordlist [options]'
77
+
78
+ opts.on('-f','--file FILE','The wordlist file to list') do |file|
79
+ @file = file
80
+ end
81
+
82
+ opts.on('--min-length NUM','Minimum length of words in characters') do |min|
83
+ @min_length = min
84
+ end
85
+
86
+ opts.on('--max-length NUM','Maximum length of words in characters') do |max|
87
+ @max_length = max
88
+ end
89
+
90
+ opts.on('-m','--mutate SUBSTRING::REPLACE','Adds a mutation rule') do |substring_and_replace|
91
+ @mutations << substring_and_replace.split('::',2)
92
+ end
93
+
94
+ opts.on('-M','--mutate-pattern PATTERN::REPLACE','Adds a mutation rule') do |pattern_and_replace|
95
+ pattern, replace = substring_and_replace.split('::',2)
96
+
97
+ @mutations << [Regexp.new(pattern), replace]
98
+ end
99
+
100
+ opts.on('-w','--words','Only print the words in the wordlist') do
101
+ @words = true
102
+ end
103
+
104
+ opts.on('-u','--unique','Only print the unique words in the wordlist') do
105
+ @unique_words = true
106
+ end
107
+
108
+ opts.on('-o','--output FILE','Optional file to output the wordlist to') do |file|
109
+ @output = File.expand_path(file)
110
+ end
111
+ end
112
+ end
113
+
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,67 @@
1
+ require 'optparse'
2
+
3
+ module Wordlist
4
+ module Runners
5
+ class Runner
6
+ #
7
+ # Creates and runs the runner with the given arguments.
8
+ #
9
+ # @param [Array<String>] args
10
+ # Arguments to parse.
11
+ #
12
+ def self.run(*args)
13
+ runner = new()
14
+ runner.run(*args)
15
+ end
16
+
17
+ #
18
+ # Runs the runner with the given arguments.
19
+ #
20
+ # @param [Array<String>] args
21
+ # Arguments to run the runner with.
22
+ #
23
+ def run(*args)
24
+ optparse(*args)
25
+ end
26
+
27
+ protected
28
+
29
+ #
30
+ # Prints the given error message.
31
+ #
32
+ # @param [String] message
33
+ # The error message to print.
34
+ #
35
+ def print_error(message)
36
+ $stderr.puts "#{$0}: #{message}"
37
+ end
38
+
39
+ #
40
+ # Parses the given arguments.
41
+ #
42
+ # @param [Array<String>] args
43
+ # Arguments to parse.
44
+ #
45
+ # @yield [opts]
46
+ # If a block is given, it will be passed the option parse to be
47
+ # configured.
48
+ #
49
+ # @yieldparam [OptionParser] opts
50
+ # The option parser to be configured.
51
+ #
52
+ def optparse(*args)
53
+ opts = OptionParser.new()
54
+
55
+ yield opts if block_given?
56
+
57
+ begin
58
+ opts.parse!(args)
59
+ rescue OptionParser::InvalidOption => e
60
+ $stderr.puts e.message
61
+ $stderr.puts opts
62
+ exit -1
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -14,8 +14,13 @@ module Wordlist
14
14
  end
15
15
 
16
16
  #
17
- # Returns +true+ if the _word_ has been previously seen, returns
18
- # +false+ otherwise.
17
+ # Determines if the given word has been previously seen.
18
+ #
19
+ # @param [String] word
20
+ # The word to check for.
21
+ #
22
+ # @return [Boolean]
23
+ # Specifies whether the word has been previously seen.
19
24
  #
20
25
  def seen?(word)
21
26
  length = word.length
@@ -24,12 +29,18 @@ module Wordlist
24
29
  end
25
30
 
26
31
  #
27
- # Marks the specified _word_ as seen and returns +true+. If the _word_
28
- # has been previously been seen, +false+ will be returned.
32
+ # Marks the given word as previously seen.
33
+ #
34
+ # @param [String] word
35
+ # The word to mark as previously seen.
36
+ #
37
+ # @return [Boolean]
38
+ # Specifies whether or not the word has not been previously seen
39
+ # until now.
29
40
  #
30
41
  def saw!(word)
31
42
  length = word.length
32
- crc = crc32(word)
43
+ crc = crc32(word)
33
44
 
34
45
  if @seen.has_key?(length)
35
46
  return false if @seen[length].include?(crc)
@@ -42,8 +53,19 @@ module Wordlist
42
53
  end
43
54
 
44
55
  #
45
- # Passes the specified _word_ through the unique filter, if the
46
- # _word_ has not yet been seen, it will be passed to the given _block_.
56
+ # Passes the given word through the unique filter.
57
+ #
58
+ # @param [String] word
59
+ # The word to pass through the unique filter.
60
+ #
61
+ # @yield [word]
62
+ # The given block will be passed the word, if the word has not been
63
+ # previously seen by the filter.
64
+ #
65
+ # @yieldparam [String] word
66
+ # A unique word that has not been previously seen by the filter.
67
+ #
68
+ # @return [nil]
47
69
  #
48
70
  def pass(word)
49
71
  if saw!(word)
@@ -53,10 +75,27 @@ module Wordlist
53
75
  return nil
54
76
  end
55
77
 
78
+ #
79
+ # Clears the unique filter.
80
+ #
81
+ # @return [UniqueFilter]
82
+ # The cleared filter.
83
+ #
84
+ def clear
85
+ @seen.clear
86
+ return self
87
+ end
88
+
56
89
  protected
57
90
 
58
91
  #
59
- # Returns the CRC32 checksum of the specified _word_.
92
+ # Returns the CRC32 checksum of the given word.
93
+ #
94
+ # @param [String] word
95
+ # The word to calculate a CRC32 checksum for.
96
+ #
97
+ # @return [Integer]
98
+ # The CRC32 checksum for the given word.
60
99
  #
61
100
  def crc32(word)
62
101
  r = 0xffffffff
@@ -1,4 +1,4 @@
1
1
  module Wordlist
2
2
  # Word version
3
- VERSION = '0.1.0'
3
+ VERSION = '0.1.1'
4
4
  end
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')))
3
3
 
4
- require 'wordlist/builder'
4
+ require 'wordlist'
5
5
  require 'benchmark'
6
6
  require 'fileutils'
7
7
 
@@ -10,9 +10,50 @@ path = File.expand_path(File.join(File.dirname(__FILE__),'shakespeare_wordlist.t
10
10
  FileUtils.rm_f(path)
11
11
 
12
12
  Benchmark.bm do |bm|
13
- bm.report('build:') do
13
+ bm.report('build') do
14
14
  Wordlist::Builder.build(path) do |wordlist|
15
15
  wordlist.parse_file('/home/hal/shaks12.txt')
16
16
  end
17
17
  end
18
+
19
+ bm.report('each_unique') do
20
+ Wordlist::FlatFile.new(path) do |wordlist|
21
+ wordlist.each_unique { |word| word }
22
+ end
23
+ end
24
+
25
+ bm.report('each_mutation (1)') do
26
+ Wordlist::FlatFile.new(path) do |wordlist|
27
+ wordlist.mutate /o/i, '0'
28
+
29
+ wordlist.each_mutation { |word| word }
30
+ end
31
+ end
32
+
33
+ bm.report('each_mutation (2)') do
34
+ Wordlist::FlatFile.new(path) do |wordlist|
35
+ wordlist.mutate /o/i, '0'
36
+ wordlist.mutate /a/i, '@'
37
+
38
+ wordlist.each_mutation { |word| word }
39
+ end
40
+ end
41
+
42
+ bm.report('each_mutation (3)') do
43
+ Wordlist::FlatFile.new(path) do |wordlist|
44
+ wordlist.mutate /o/i, '0'
45
+ wordlist.mutate /a/i, '@'
46
+ wordlist.mutate /e/i, '3'
47
+
48
+ wordlist.each_mutation { |word| word }
49
+ end
50
+ end
51
+ end
52
+
53
+ Benchmark.bm do |bm|
54
+ mutator = Wordlist::Mutator.new(/o/i, '0')
55
+
56
+ bm.report('Mutator#each') do
57
+ mutator.each('lololololoLOLOLOLOLO') { |word| }
58
+ end
18
59
  end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+ require 'helpers/text'
3
+ require 'helpers/wordlist'
4
+
5
+ shared_examples_for "a wordlist Builder" do
6
+ include Helpers
7
+
8
+ before(:all) do
9
+ @words = ['dog', 'cat', 'catx', 'dat', 'dog', 'cat']
10
+ @sentence = 'dog cat catx, dog dat.'
11
+ @text = 'dog cat: catx. dog cat dat dog.'
12
+ @file = Helpers::SAMPLE_TEXT
13
+ end
14
+
15
+ it "should build a unique wordlist from words" do
16
+ Builder.build(@path) do |wordlist|
17
+ wordlist += @words
18
+ end
19
+
20
+ should_contain_words(@path,@expected)
21
+ end
22
+
23
+ it "should build a unique wordlist from a sentence" do
24
+ Builder.build(@path) do |wordlist|
25
+ wordlist.parse(@sentence)
26
+ end
27
+
28
+ should_contain_words(@path,@expected)
29
+ end
30
+
31
+ it "should build a unique wordlist from text" do
32
+ Builder.build(@path) do |wordlist|
33
+ wordlist.parse(@text)
34
+ end
35
+
36
+ should_contain_words(@path,@expected)
37
+ end
38
+
39
+ it "should build a unique wordlist from a file" do
40
+ Builder.build(@path) do |wordlist|
41
+ wordlist.parse_file(@file)
42
+ end
43
+
44
+ should_contain_words(@path,@expected)
45
+ end
46
+ end