wordlist 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -0
- data/Manifest.txt +30 -0
- data/README.txt +103 -0
- data/Rakefile +22 -0
- data/lib/wordlist.rb +4 -0
- data/lib/wordlist/builder.rb +128 -0
- data/lib/wordlist/builders.rb +1 -0
- data/lib/wordlist/builders/website.rb +44 -0
- data/lib/wordlist/flat_file.rb +36 -0
- data/lib/wordlist/list.rb +131 -0
- data/lib/wordlist/mutator.rb +84 -0
- data/lib/wordlist/parsers.rb +69 -0
- data/lib/wordlist/unique_filter.rb +73 -0
- data/lib/wordlist/version.rb +4 -0
- data/scripts/benchmark +18 -0
- data/scripts/text/comedy_of_errors.txt +4011 -0
- data/spec/builder_spec.rb +36 -0
- data/spec/classes/parser_class.rb +5 -0
- data/spec/classes/test_list.rb +9 -0
- data/spec/flat_file_spec.rb +25 -0
- data/spec/list_spec.rb +58 -0
- data/spec/mutator_spec.rb +43 -0
- data/spec/parsers_spec.rb +118 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/text/flat_file.txt +3 -0
- data/spec/text/previous_wordlist.txt +3 -0
- data/spec/text/sample.txt +3 -0
- data/spec/unique_filter_spec.rb +34 -0
- data/spec/wordlist_spec.rb +9 -0
- data/tasks/spec.rb +9 -0
- metadata +123 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'wordlist/builder'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'helpers/text'
|
5
|
+
require 'builder_examples'
|
6
|
+
|
7
|
+
require 'tempfile'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
describe Builder do
|
11
|
+
describe "new wordlist" do
|
12
|
+
before(:all) do
|
13
|
+
@expected = ['dog', 'cat', 'catx', 'dat']
|
14
|
+
end
|
15
|
+
|
16
|
+
before(:each) do
|
17
|
+
@path = Tempfile.new('wordlist').path
|
18
|
+
end
|
19
|
+
|
20
|
+
it_should_behave_like "a wordlist Builder"
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "existing wordlist" do
|
24
|
+
before(:all) do
|
25
|
+
@path = '/tmp/bla'
|
26
|
+
@expected = ['dog', 'cat', 'log', 'catx', 'dat']
|
27
|
+
end
|
28
|
+
|
29
|
+
before(:each) do
|
30
|
+
@path = Tempfile.new('wordlist').path
|
31
|
+
FileUtils.cp(PREVIOUS_WORDLIST,@path)
|
32
|
+
end
|
33
|
+
|
34
|
+
it_should_behave_like "a wordlist Builder"
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'wordlist/flat_file'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe FlatFile do
|
6
|
+
before(:all) do
|
7
|
+
@path = File.join(File.dirname(__FILE__),'text','flat_file.txt')
|
8
|
+
@list = FlatFile.new(@path)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should have a path it reads from" do
|
12
|
+
@list.path.should == @path
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should read the lines of the flat-file" do
|
16
|
+
words = ['one', 'two', 'three']
|
17
|
+
|
18
|
+
@list.each_word do |word|
|
19
|
+
words.include?(word).should == true
|
20
|
+
words.delete(word)
|
21
|
+
end
|
22
|
+
|
23
|
+
words.should == []
|
24
|
+
end
|
25
|
+
end
|
data/spec/list_spec.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'wordlist/list'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'classes/test_list'
|
5
|
+
|
6
|
+
describe List do
|
7
|
+
before(:all) do
|
8
|
+
@source = TestList.new
|
9
|
+
@source.mutate 'o', '0'
|
10
|
+
@source.mutate 'a', 'A'
|
11
|
+
@source.mutate 'e', '3'
|
12
|
+
@source.mutate 's', '5'
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should iterate over each word" do
|
16
|
+
words = []
|
17
|
+
|
18
|
+
@source.each_word { |word| words << word }
|
19
|
+
|
20
|
+
words.should == ['omg.hackers']
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should iterate over each unique word" do
|
24
|
+
words = []
|
25
|
+
|
26
|
+
@source.each_unique { |word| words << word }
|
27
|
+
|
28
|
+
words.should == ['omg.hackers']
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should iterate over every possible mutated word" do
|
32
|
+
mutations = %w{
|
33
|
+
0mg.hAck3r5
|
34
|
+
0mg.hAck3rs
|
35
|
+
0mg.hAcker5
|
36
|
+
0mg.hAckers
|
37
|
+
0mg.hack3r5
|
38
|
+
0mg.hack3rs
|
39
|
+
0mg.hacker5
|
40
|
+
0mg.hackers
|
41
|
+
omg.hAck3r5
|
42
|
+
omg.hAck3rs
|
43
|
+
omg.hAcker5
|
44
|
+
omg.hAckers
|
45
|
+
omg.hack3r5
|
46
|
+
omg.hack3rs
|
47
|
+
omg.hacker5
|
48
|
+
omg.hackers
|
49
|
+
}
|
50
|
+
|
51
|
+
@source.each_mutation do |mutation|
|
52
|
+
mutations.include?(mutation).should == true
|
53
|
+
mutations.delete(mutation)
|
54
|
+
end
|
55
|
+
|
56
|
+
mutations.should == []
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'wordlist/mutator'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Mutator do
|
6
|
+
it "should replace matched text with a byte" do
|
7
|
+
mutator = Mutator.new('o',0x41)
|
8
|
+
mutator.replace('o').should == 'A'
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should replace matched text with a String" do
|
12
|
+
mutator = Mutator.new('o','0')
|
13
|
+
mutator.replace('o').should == '0'
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should replace matched text using a proc" do
|
17
|
+
mutator = Mutator.new('o') { |match| match * 2 }
|
18
|
+
mutator.replace('o').should == 'oo'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should iterate over every possible substitution" do
|
22
|
+
remaining = ['lolol', 'l0lol', 'lol0l', 'l0l0l']
|
23
|
+
|
24
|
+
mutator = Mutator.new(/o/,'0')
|
25
|
+
mutator.each('lolol') do |mutation|
|
26
|
+
remaining.include?(mutation).should == true
|
27
|
+
remaining.delete(mutation)
|
28
|
+
end
|
29
|
+
|
30
|
+
remaining.should == []
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should iterate over the original word, if no matches were found" do
|
34
|
+
mutations = []
|
35
|
+
mutator = Mutator.new('x','0')
|
36
|
+
|
37
|
+
mutator.each('hello') do |mutant|
|
38
|
+
mutations << mutant
|
39
|
+
end
|
40
|
+
|
41
|
+
mutations.should == ['hello']
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'classes/parser_class'
|
3
|
+
|
4
|
+
describe Parsers do
|
5
|
+
describe "default" do
|
6
|
+
before(:all) do
|
7
|
+
@parser = ParserClass.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse words from a sentence" do
|
11
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
12
|
+
words = %w{The Deliverator is in touch with the road starts like a bad day stops on a peseta}
|
13
|
+
|
14
|
+
@parser.parse(sentence).should == words
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should ignore punctuation by default while parsing a sentence" do
|
18
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
19
|
+
words = %w{
|
20
|
+
Oh they used to argue over times many corporate driver-years lost to it homeowners red-faced and sweaty with their own lies stinking of Old Spice and job-related stress standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink I swear can't you guys tell time
|
21
|
+
}
|
22
|
+
|
23
|
+
@parser.parse(sentence).should == words
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should ignore URLs by default while parsing a sentence" do
|
27
|
+
sentence = %{Click on the following link: http://www.example.com/}
|
28
|
+
words = %w{Click on the following link}
|
29
|
+
|
30
|
+
@parser.parse(sentence).should == words
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should ignore short URIs by default while parsing a sentence" do
|
34
|
+
sentence = %{Click on the following link: jabber://}
|
35
|
+
words = %w{Click on the following link}
|
36
|
+
|
37
|
+
@parser.parse(sentence).should == words
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should ignore complex HTTP URLs by default while parsing a sentence" do
|
41
|
+
sentence = %{Click on the following link: http://www.google.com/search?hl=en&client=firefox-a&rls=org.mozilla:en-US:official&hs=jU&q=ruby+datamapper&start=20&sa=N}
|
42
|
+
words = %w{Click on the following link}
|
43
|
+
|
44
|
+
@parser.parse(sentence).should == words
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "ignoring phone numbers" do
|
49
|
+
before(:all) do
|
50
|
+
@parser = ParserClass.new
|
51
|
+
@parser.ignore_phone_numbers = true
|
52
|
+
end
|
53
|
+
|
54
|
+
it "may ignore phone numbers while parsing a sentence" do
|
55
|
+
sentence = %{Call me before 12, 1-888-444-2222.}
|
56
|
+
words = %w{Call me before 12}
|
57
|
+
|
58
|
+
@parser.parse(sentence).should == words
|
59
|
+
end
|
60
|
+
|
61
|
+
it "may ignore long-distance phone numbers while parsing a sentence" do
|
62
|
+
sentence = %{Call me before 12, 1-444-2222.}
|
63
|
+
words = %w{Call me before 12}
|
64
|
+
|
65
|
+
@parser.parse(sentence).should == words
|
66
|
+
end
|
67
|
+
|
68
|
+
it "may ignore short phone numbers while parsing a sentence" do
|
69
|
+
sentence = %{Call me before 12, 444-2222.}
|
70
|
+
words = %w{Call me before 12}
|
71
|
+
|
72
|
+
@parser.parse(sentence).should == words
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "ignoring references" do
|
77
|
+
before(:all) do
|
78
|
+
@parser = ParserClass.new
|
79
|
+
@parser.ignore_references = true
|
80
|
+
end
|
81
|
+
|
82
|
+
it "may ignore RFC style references while parsing a sentence" do
|
83
|
+
sentence = %{As one can see, it has failed [1].}
|
84
|
+
words = %w{As one can see it has failed}
|
85
|
+
|
86
|
+
@parser.parse(sentence).should == words
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "ignoring case" do
|
91
|
+
before(:all) do
|
92
|
+
@parser = ParserClass.new
|
93
|
+
@parser.ignore_case = true
|
94
|
+
end
|
95
|
+
|
96
|
+
it "may ignore case while parsing a sentence" do
|
97
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
98
|
+
words = %w{the deliverator is in touch with the road starts like a bad day stops on a peseta}
|
99
|
+
|
100
|
+
@parser.ignore_case = true
|
101
|
+
@parser.parse(sentence).should == words
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe "preserving punctuation" do
|
106
|
+
before(:all) do
|
107
|
+
@parser = ParserClass.new
|
108
|
+
@parser.ignore_punctuation = false
|
109
|
+
end
|
110
|
+
|
111
|
+
it "may preserve punctuation while parsing a sentence" do
|
112
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
113
|
+
words = %w{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
114
|
+
|
115
|
+
@parser.parse(sentence).should == words
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'wordlist/unique_filter'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe UniqueFilter do
|
6
|
+
before(:each) do
|
7
|
+
@filter = UniqueFilter.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should have seen words" do
|
11
|
+
@filter.saw!('cat')
|
12
|
+
|
13
|
+
@filter.seen?('cat').should == true
|
14
|
+
@filter.seen?('dog').should == false
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should only see a unique word once" do
|
18
|
+
@filter.saw!('cat').should == true
|
19
|
+
@filter.saw!('cat').should == false
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should pass only unique words through the filter" do
|
23
|
+
input = ['dog', 'cat', 'dog']
|
24
|
+
output = []
|
25
|
+
|
26
|
+
input.each do |word|
|
27
|
+
@filter.pass(word) do |result|
|
28
|
+
output << result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
output.should == ['dog', 'cat']
|
33
|
+
end
|
34
|
+
end
|
data/tasks/spec.rb
ADDED
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wordlist
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Postmodern
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-31 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: spidr
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.1.9
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.1.12
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: hoe
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 2.3.3
|
44
|
+
version:
|
45
|
+
description: |-
|
46
|
+
A Ruby library for generating and working with word-lists. Wordlist allows
|
47
|
+
one to efficiently generate unique word-lists from arbitrary text or
|
48
|
+
other sources, such as website content. Wordlist can also quickly enumerate
|
49
|
+
through words within an existing word-list, applying multiple mutation
|
50
|
+
rules to each word in the list.
|
51
|
+
email:
|
52
|
+
- postmodern.mod3@gmail.com
|
53
|
+
executables: []
|
54
|
+
|
55
|
+
extensions: []
|
56
|
+
|
57
|
+
extra_rdoc_files:
|
58
|
+
- History.txt
|
59
|
+
- Manifest.txt
|
60
|
+
- README.txt
|
61
|
+
- scripts/text/comedy_of_errors.txt
|
62
|
+
files:
|
63
|
+
- History.txt
|
64
|
+
- Manifest.txt
|
65
|
+
- README.txt
|
66
|
+
- Rakefile
|
67
|
+
- lib/wordlist.rb
|
68
|
+
- lib/wordlist/unique_filter.rb
|
69
|
+
- lib/wordlist/parsers.rb
|
70
|
+
- lib/wordlist/builder.rb
|
71
|
+
- lib/wordlist/builders.rb
|
72
|
+
- lib/wordlist/builders/website.rb
|
73
|
+
- lib/wordlist/mutator.rb
|
74
|
+
- lib/wordlist/list.rb
|
75
|
+
- lib/wordlist/flat_file.rb
|
76
|
+
- lib/wordlist/version.rb
|
77
|
+
- tasks/spec.rb
|
78
|
+
- scripts/benchmark
|
79
|
+
- scripts/text/comedy_of_errors.txt
|
80
|
+
- spec/classes/parser_class.rb
|
81
|
+
- spec/classes/test_list.rb
|
82
|
+
- spec/text/previous_wordlist.txt
|
83
|
+
- spec/text/sample.txt
|
84
|
+
- spec/text/flat_file.txt
|
85
|
+
- spec/spec_helper.rb
|
86
|
+
- spec/unique_filter_spec.rb
|
87
|
+
- spec/parsers_spec.rb
|
88
|
+
- spec/mutator_spec.rb
|
89
|
+
- spec/builder_spec.rb
|
90
|
+
- spec/list_spec.rb
|
91
|
+
- spec/flat_file_spec.rb
|
92
|
+
- spec/wordlist_spec.rb
|
93
|
+
has_rdoc: true
|
94
|
+
homepage: http://wordlist.rubyforge.org/
|
95
|
+
licenses: []
|
96
|
+
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options:
|
99
|
+
- --main
|
100
|
+
- README.txt
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: "0"
|
108
|
+
version:
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: "0"
|
114
|
+
version:
|
115
|
+
requirements: []
|
116
|
+
|
117
|
+
rubyforge_project: wordlist
|
118
|
+
rubygems_version: 1.3.5
|
119
|
+
signing_key:
|
120
|
+
specification_version: 3
|
121
|
+
summary: A Ruby library for generating and working with word-lists
|
122
|
+
test_files: []
|
123
|
+
|