wordlist 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,34 +1,39 @@
1
1
  module Wordlist
2
2
  module Parsers
3
- def self.included(base)
4
- base.module_eval do
5
- # Ignore case of parsed text
6
- attr_accessor :ignore_case
3
+ # Ignore case of parsed text
4
+ attr_accessor :ignore_case
7
5
 
8
- # Ignore the punctuation of parsed text
9
- attr_accessor :ignore_punctuation
6
+ # Ignore the punctuation of parsed text
7
+ attr_accessor :ignore_punctuation
10
8
 
11
- # Ignore URLs
12
- attr_accessor :ignore_urls
9
+ # Ignore URLs
10
+ attr_accessor :ignore_urls
13
11
 
14
- # Ignore Phone numbers
15
- attr_accessor :ignore_phone_numbers
12
+ # Ignore Phone numbers
13
+ attr_accessor :ignore_phone_numbers
16
14
 
17
- # Ignore References
18
- attr_accessor :ignore_references
19
- end
20
- end
15
+ # Ignore References
16
+ attr_accessor :ignore_references
21
17
 
18
+ #
19
+ # Initializes the parsers settings.
20
+ #
22
21
  def initialize
23
- @ignore_case = false
24
- @ignore_punctuation = true
25
- @ignore_urls = true
22
+ @ignore_case = false
23
+ @ignore_punctuation = true
24
+ @ignore_urls = true
26
25
  @ignore_phone_numbers = false
27
- @ignore_references = false
26
+ @ignore_references = false
28
27
  end
29
28
 
30
29
  #
31
- # Parses the specified _text_ and returns an Array of tokens.
30
+ # Parses the given text.
31
+ #
32
+ # @param [String] text
33
+ # The text to parse.
34
+ #
35
+ # @return [Array<String>]
36
+ # The Array of parsed tokens.
32
37
  #
33
38
  def parse(text)
34
39
  text = text.to_s
@@ -0,0 +1,2 @@
1
+ require 'wordlist/runners/runner'
2
+ require 'wordlist/runners/list'
@@ -0,0 +1,116 @@
1
+ require 'wordlist/runners/runner'
2
+ require 'wordlist/flat_file'
3
+
4
+ module Wordlist
5
+ module Runners
6
+ class List < Runner
7
+
8
+ #
9
+ # Creates a new List Runner.
10
+ #
11
+ def initialize
12
+ @file = nil
13
+ @min_length = nil
14
+ @max_length = nil
15
+ @mutations = []
16
+
17
+ @words = false
18
+ @unique_words = false
19
+
20
+ @output = nil
21
+ end
22
+
23
+ #
24
+ # Runs the list runner.
25
+ #
26
+ # @param [Array<String>] args
27
+ # Arguments to run the runner with.
28
+ #
29
+ def run(*args)
30
+ super(*args)
31
+
32
+ list = if @file
33
+ FlatFile.new(
34
+ @file,
35
+ :min_length => @min_length,
36
+ :max_length => @max_length
37
+ )
38
+ else
39
+ print_error('the --file option must be specified')
40
+ exit -1
41
+ end
42
+
43
+ @mutations.each do |pattern,substitute|
44
+ list.mutate(pattern,substitute)
45
+ end
46
+
47
+ words = lambda { |output|
48
+ puts = output.method(:puts)
49
+
50
+ if @unique_words
51
+ list.each_unique(&puts)
52
+ elsif @words
53
+ list.each_word(&puts)
54
+ else
55
+ list.each(&puts)
56
+ end
57
+ }
58
+
59
+ if @output
60
+ File.open(@output,'w+',&words)
61
+ else
62
+ words.call(Kernel)
63
+ end
64
+ end
65
+
66
+ protected
67
+
68
+ #
69
+ # Parses the given arguments.
70
+ #
71
+ # @param [Array<String>] args
72
+ # Arguments to parse.
73
+ #
74
+ def optparse(*args)
75
+ super(*args) do |opts|
76
+ opts.banner = 'usage: wordlist [options]'
77
+
78
+ opts.on('-f','--file FILE','The wordlist file to list') do |file|
79
+ @file = file
80
+ end
81
+
82
+ opts.on('--min-length NUM','Minimum length of words in characters') do |min|
83
+ @min_length = min
84
+ end
85
+
86
+ opts.on('--max-length NUM','Maximum length of words in characters') do |max|
87
+ @max_length = max
88
+ end
89
+
90
+ opts.on('-m','--mutate SUBSTRING::REPLACE','Adds a mutation rule') do |substring_and_replace|
91
+ @mutations << substring_and_replace.split('::',2)
92
+ end
93
+
94
+ opts.on('-M','--mutate-pattern PATTERN::REPLACE','Adds a mutation rule') do |pattern_and_replace|
95
+ pattern, replace = substring_and_replace.split('::',2)
96
+
97
+ @mutations << [Regexp.new(pattern), replace]
98
+ end
99
+
100
+ opts.on('-w','--words','Only print the words in the wordlist') do
101
+ @words = true
102
+ end
103
+
104
+ opts.on('-u','--unique','Only print the unique words in the wordlist') do
105
+ @unique_words = true
106
+ end
107
+
108
+ opts.on('-o','--output FILE','Optional file to output the wordlist to') do |file|
109
+ @output = File.expand_path(file)
110
+ end
111
+ end
112
+ end
113
+
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,67 @@
1
+ require 'optparse'
2
+
3
+ module Wordlist
4
+ module Runners
5
+ class Runner
6
+ #
7
+ # Creates and runs the runner with the given arguments.
8
+ #
9
+ # @param [Array<String>] args
10
+ # Arguments to parse.
11
+ #
12
+ def self.run(*args)
13
+ runner = new()
14
+ runner.run(*args)
15
+ end
16
+
17
+ #
18
+ # Runs the runner with the given arguments.
19
+ #
20
+ # @param [Array<String>] args
21
+ # Arguments to run the runner with.
22
+ #
23
+ def run(*args)
24
+ optparse(*args)
25
+ end
26
+
27
+ protected
28
+
29
+ #
30
+ # Prints the given error message.
31
+ #
32
+ # @param [String] message
33
+ # The error message to print.
34
+ #
35
+ def print_error(message)
36
+ $stderr.puts "#{$0}: #{message}"
37
+ end
38
+
39
+ #
40
+ # Parses the given arguments.
41
+ #
42
+ # @param [Array<String>] args
43
+ # Arguments to parse.
44
+ #
45
+ # @yield [opts]
46
+ # If a block is given, it will be passed the option parse to be
47
+ # configured.
48
+ #
49
+ # @yieldparam [OptionParser] opts
50
+ # The option parser to be configured.
51
+ #
52
+ def optparse(*args)
53
+ opts = OptionParser.new()
54
+
55
+ yield opts if block_given?
56
+
57
+ begin
58
+ opts.parse!(args)
59
+ rescue OptionParser::InvalidOption => e
60
+ $stderr.puts e.message
61
+ $stderr.puts opts
62
+ exit -1
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -14,8 +14,13 @@ module Wordlist
14
14
  end
15
15
 
16
16
  #
17
- # Returns +true+ if the _word_ has been previously seen, returns
18
- # +false+ otherwise.
17
+ # Determines if the given word has been previously seen.
18
+ #
19
+ # @param [String] word
20
+ # The word to check for.
21
+ #
22
+ # @return [Boolean]
23
+ # Specifies whether the word has been previously seen.
19
24
  #
20
25
  def seen?(word)
21
26
  length = word.length
@@ -24,12 +29,18 @@ module Wordlist
24
29
  end
25
30
 
26
31
  #
27
- # Marks the specified _word_ as seen and returns +true+. If the _word_
28
- # has been previously been seen, +false+ will be returned.
32
+ # Marks the given word as previously seen.
33
+ #
34
+ # @param [String] word
35
+ # The word to mark as previously seen.
36
+ #
37
+ # @return [Boolean]
38
+ # Specifies whether or not the word has not been previously seen
39
+ # until now.
29
40
  #
30
41
  def saw!(word)
31
42
  length = word.length
32
- crc = crc32(word)
43
+ crc = crc32(word)
33
44
 
34
45
  if @seen.has_key?(length)
35
46
  return false if @seen[length].include?(crc)
@@ -42,8 +53,19 @@ module Wordlist
42
53
  end
43
54
 
44
55
  #
45
- # Passes the specified _word_ through the unique filter, if the
46
- # _word_ has not yet been seen, it will be passed to the given _block_.
56
+ # Passes the given word through the unique filter.
57
+ #
58
+ # @param [String] word
59
+ # The word to pass through the unique filter.
60
+ #
61
+ # @yield [word]
62
+ # The given block will be passed the word, if the word has not been
63
+ # previously seen by the filter.
64
+ #
65
+ # @yieldparam [String] word
66
+ # A unique word that has not been previously seen by the filter.
67
+ #
68
+ # @return [nil]
47
69
  #
48
70
  def pass(word)
49
71
  if saw!(word)
@@ -53,10 +75,27 @@ module Wordlist
53
75
  return nil
54
76
  end
55
77
 
78
+ #
79
+ # Clears the unique filter.
80
+ #
81
+ # @return [UniqueFilter]
82
+ # The cleared filter.
83
+ #
84
+ def clear
85
+ @seen.clear
86
+ return self
87
+ end
88
+
56
89
  protected
57
90
 
58
91
  #
59
- # Returns the CRC32 checksum of the specified _word_.
92
+ # Returns the CRC32 checksum of the given word.
93
+ #
94
+ # @param [String] word
95
+ # The word to calculate a CRC32 checksum for.
96
+ #
97
+ # @return [Integer]
98
+ # The CRC32 checksum for the given word.
60
99
  #
61
100
  def crc32(word)
62
101
  r = 0xffffffff
@@ -1,4 +1,4 @@
1
1
  module Wordlist
2
2
  # Word version
3
- VERSION = '0.1.0'
3
+ VERSION = '0.1.1'
4
4
  end
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')))
3
3
 
4
- require 'wordlist/builder'
4
+ require 'wordlist'
5
5
  require 'benchmark'
6
6
  require 'fileutils'
7
7
 
@@ -10,9 +10,50 @@ path = File.expand_path(File.join(File.dirname(__FILE__),'shakespeare_wordlist.t
10
10
  FileUtils.rm_f(path)
11
11
 
12
12
  Benchmark.bm do |bm|
13
- bm.report('build:') do
13
+ bm.report('build') do
14
14
  Wordlist::Builder.build(path) do |wordlist|
15
15
  wordlist.parse_file('/home/hal/shaks12.txt')
16
16
  end
17
17
  end
18
+
19
+ bm.report('each_unique') do
20
+ Wordlist::FlatFile.new(path) do |wordlist|
21
+ wordlist.each_unique { |word| word }
22
+ end
23
+ end
24
+
25
+ bm.report('each_mutation (1)') do
26
+ Wordlist::FlatFile.new(path) do |wordlist|
27
+ wordlist.mutate /o/i, '0'
28
+
29
+ wordlist.each_mutation { |word| word }
30
+ end
31
+ end
32
+
33
+ bm.report('each_mutation (2)') do
34
+ Wordlist::FlatFile.new(path) do |wordlist|
35
+ wordlist.mutate /o/i, '0'
36
+ wordlist.mutate /a/i, '@'
37
+
38
+ wordlist.each_mutation { |word| word }
39
+ end
40
+ end
41
+
42
+ bm.report('each_mutation (3)') do
43
+ Wordlist::FlatFile.new(path) do |wordlist|
44
+ wordlist.mutate /o/i, '0'
45
+ wordlist.mutate /a/i, '@'
46
+ wordlist.mutate /e/i, '3'
47
+
48
+ wordlist.each_mutation { |word| word }
49
+ end
50
+ end
51
+ end
52
+
53
+ Benchmark.bm do |bm|
54
+ mutator = Wordlist::Mutator.new(/o/i, '0')
55
+
56
+ bm.report('Mutator#each') do
57
+ mutator.each('lololololoLOLOLOLOLO') { |word| }
58
+ end
18
59
  end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+ require 'helpers/text'
3
+ require 'helpers/wordlist'
4
+
5
+ shared_examples_for "a wordlist Builder" do
6
+ include Helpers
7
+
8
+ before(:all) do
9
+ @words = ['dog', 'cat', 'catx', 'dat', 'dog', 'cat']
10
+ @sentence = 'dog cat catx, dog dat.'
11
+ @text = 'dog cat: catx. dog cat dat dog.'
12
+ @file = Helpers::SAMPLE_TEXT
13
+ end
14
+
15
+ it "should build a unique wordlist from words" do
16
+ Builder.build(@path) do |wordlist|
17
+ wordlist += @words
18
+ end
19
+
20
+ should_contain_words(@path,@expected)
21
+ end
22
+
23
+ it "should build a unique wordlist from a sentence" do
24
+ Builder.build(@path) do |wordlist|
25
+ wordlist.parse(@sentence)
26
+ end
27
+
28
+ should_contain_words(@path,@expected)
29
+ end
30
+
31
+ it "should build a unique wordlist from text" do
32
+ Builder.build(@path) do |wordlist|
33
+ wordlist.parse(@text)
34
+ end
35
+
36
+ should_contain_words(@path,@expected)
37
+ end
38
+
39
+ it "should build a unique wordlist from a file" do
40
+ Builder.build(@path) do |wordlist|
41
+ wordlist.parse_file(@file)
42
+ end
43
+
44
+ should_contain_words(@path,@expected)
45
+ end
46
+ end