wordlist 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/Manifest.txt +30 -0
- data/README.txt +103 -0
- data/Rakefile +22 -0
- data/lib/wordlist.rb +4 -0
- data/lib/wordlist/builder.rb +128 -0
- data/lib/wordlist/builders.rb +1 -0
- data/lib/wordlist/builders/website.rb +44 -0
- data/lib/wordlist/flat_file.rb +36 -0
- data/lib/wordlist/list.rb +131 -0
- data/lib/wordlist/mutator.rb +84 -0
- data/lib/wordlist/parsers.rb +69 -0
- data/lib/wordlist/unique_filter.rb +73 -0
- data/lib/wordlist/version.rb +4 -0
- data/scripts/benchmark +18 -0
- data/scripts/text/comedy_of_errors.txt +4011 -0
- data/spec/builder_spec.rb +36 -0
- data/spec/classes/parser_class.rb +5 -0
- data/spec/classes/test_list.rb +9 -0
- data/spec/flat_file_spec.rb +25 -0
- data/spec/list_spec.rb +58 -0
- data/spec/mutator_spec.rb +43 -0
- data/spec/parsers_spec.rb +118 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/text/flat_file.txt +3 -0
- data/spec/text/previous_wordlist.txt +3 -0
- data/spec/text/sample.txt +3 -0
- data/spec/unique_filter_spec.rb +34 -0
- data/spec/wordlist_spec.rb +9 -0
- data/tasks/spec.rb +9 -0
- metadata +123 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'wordlist/builder'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'helpers/text'
|
5
|
+
require 'builder_examples'
|
6
|
+
|
7
|
+
require 'tempfile'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
describe Builder do
|
11
|
+
describe "new wordlist" do
|
12
|
+
before(:all) do
|
13
|
+
@expected = ['dog', 'cat', 'catx', 'dat']
|
14
|
+
end
|
15
|
+
|
16
|
+
before(:each) do
|
17
|
+
@path = Tempfile.new('wordlist').path
|
18
|
+
end
|
19
|
+
|
20
|
+
it_should_behave_like "a wordlist Builder"
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "existing wordlist" do
|
24
|
+
before(:all) do
|
25
|
+
@path = '/tmp/bla'
|
26
|
+
@expected = ['dog', 'cat', 'log', 'catx', 'dat']
|
27
|
+
end
|
28
|
+
|
29
|
+
before(:each) do
|
30
|
+
@path = Tempfile.new('wordlist').path
|
31
|
+
FileUtils.cp(PREVIOUS_WORDLIST,@path)
|
32
|
+
end
|
33
|
+
|
34
|
+
it_should_behave_like "a wordlist Builder"
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'wordlist/flat_file'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe FlatFile do
|
6
|
+
before(:all) do
|
7
|
+
@path = File.join(File.dirname(__FILE__),'text','flat_file.txt')
|
8
|
+
@list = FlatFile.new(@path)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should have a path it reads from" do
|
12
|
+
@list.path.should == @path
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should read the lines of the flat-file" do
|
16
|
+
words = ['one', 'two', 'three']
|
17
|
+
|
18
|
+
@list.each_word do |word|
|
19
|
+
words.include?(word).should == true
|
20
|
+
words.delete(word)
|
21
|
+
end
|
22
|
+
|
23
|
+
words.should == []
|
24
|
+
end
|
25
|
+
end
|
data/spec/list_spec.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'wordlist/list'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'classes/test_list'
|
5
|
+
|
6
|
+
describe List do
|
7
|
+
before(:all) do
|
8
|
+
@source = TestList.new
|
9
|
+
@source.mutate 'o', '0'
|
10
|
+
@source.mutate 'a', 'A'
|
11
|
+
@source.mutate 'e', '3'
|
12
|
+
@source.mutate 's', '5'
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should iterate over each word" do
|
16
|
+
words = []
|
17
|
+
|
18
|
+
@source.each_word { |word| words << word }
|
19
|
+
|
20
|
+
words.should == ['omg.hackers']
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should iterate over each unique word" do
|
24
|
+
words = []
|
25
|
+
|
26
|
+
@source.each_unique { |word| words << word }
|
27
|
+
|
28
|
+
words.should == ['omg.hackers']
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should iterate over every possible mutated word" do
|
32
|
+
mutations = %w{
|
33
|
+
0mg.hAck3r5
|
34
|
+
0mg.hAck3rs
|
35
|
+
0mg.hAcker5
|
36
|
+
0mg.hAckers
|
37
|
+
0mg.hack3r5
|
38
|
+
0mg.hack3rs
|
39
|
+
0mg.hacker5
|
40
|
+
0mg.hackers
|
41
|
+
omg.hAck3r5
|
42
|
+
omg.hAck3rs
|
43
|
+
omg.hAcker5
|
44
|
+
omg.hAckers
|
45
|
+
omg.hack3r5
|
46
|
+
omg.hack3rs
|
47
|
+
omg.hacker5
|
48
|
+
omg.hackers
|
49
|
+
}
|
50
|
+
|
51
|
+
@source.each_mutation do |mutation|
|
52
|
+
mutations.include?(mutation).should == true
|
53
|
+
mutations.delete(mutation)
|
54
|
+
end
|
55
|
+
|
56
|
+
mutations.should == []
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'wordlist/mutator'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Mutator do
|
6
|
+
it "should replace matched text with a byte" do
|
7
|
+
mutator = Mutator.new('o',0x41)
|
8
|
+
mutator.replace('o').should == 'A'
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should replace matched text with a String" do
|
12
|
+
mutator = Mutator.new('o','0')
|
13
|
+
mutator.replace('o').should == '0'
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should replace matched text using a proc" do
|
17
|
+
mutator = Mutator.new('o') { |match| match * 2 }
|
18
|
+
mutator.replace('o').should == 'oo'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should iterate over every possible substitution" do
|
22
|
+
remaining = ['lolol', 'l0lol', 'lol0l', 'l0l0l']
|
23
|
+
|
24
|
+
mutator = Mutator.new(/o/,'0')
|
25
|
+
mutator.each('lolol') do |mutation|
|
26
|
+
remaining.include?(mutation).should == true
|
27
|
+
remaining.delete(mutation)
|
28
|
+
end
|
29
|
+
|
30
|
+
remaining.should == []
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should iterate over the original word, if no matches were found" do
|
34
|
+
mutations = []
|
35
|
+
mutator = Mutator.new('x','0')
|
36
|
+
|
37
|
+
mutator.each('hello') do |mutant|
|
38
|
+
mutations << mutant
|
39
|
+
end
|
40
|
+
|
41
|
+
mutations.should == ['hello']
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'classes/parser_class'
|
3
|
+
|
4
|
+
describe Parsers do
|
5
|
+
describe "default" do
|
6
|
+
before(:all) do
|
7
|
+
@parser = ParserClass.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse words from a sentence" do
|
11
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
12
|
+
words = %w{The Deliverator is in touch with the road starts like a bad day stops on a peseta}
|
13
|
+
|
14
|
+
@parser.parse(sentence).should == words
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should ignore punctuation by default while parsing a sentence" do
|
18
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
19
|
+
words = %w{
|
20
|
+
Oh they used to argue over times many corporate driver-years lost to it homeowners red-faced and sweaty with their own lies stinking of Old Spice and job-related stress standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink I swear can't you guys tell time
|
21
|
+
}
|
22
|
+
|
23
|
+
@parser.parse(sentence).should == words
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should ignore URLs by default while parsing a sentence" do
|
27
|
+
sentence = %{Click on the following link: http://www.example.com/}
|
28
|
+
words = %w{Click on the following link}
|
29
|
+
|
30
|
+
@parser.parse(sentence).should == words
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should ignore short URIs by default while parsing a sentence" do
|
34
|
+
sentence = %{Click on the following link: jabber://}
|
35
|
+
words = %w{Click on the following link}
|
36
|
+
|
37
|
+
@parser.parse(sentence).should == words
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should ignore complex HTTP URLs by default while parsing a sentence" do
|
41
|
+
sentence = %{Click on the following link: http://www.google.com/search?hl=en&client=firefox-a&rls=org.mozilla:en-US:official&hs=jU&q=ruby+datamapper&start=20&sa=N}
|
42
|
+
words = %w{Click on the following link}
|
43
|
+
|
44
|
+
@parser.parse(sentence).should == words
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "ignoring phone numbers" do
|
49
|
+
before(:all) do
|
50
|
+
@parser = ParserClass.new
|
51
|
+
@parser.ignore_phone_numbers = true
|
52
|
+
end
|
53
|
+
|
54
|
+
it "may ignore phone numbers while parsing a sentence" do
|
55
|
+
sentence = %{Call me before 12, 1-888-444-2222.}
|
56
|
+
words = %w{Call me before 12}
|
57
|
+
|
58
|
+
@parser.parse(sentence).should == words
|
59
|
+
end
|
60
|
+
|
61
|
+
it "may ignore long-distance phone numbers while parsing a sentence" do
|
62
|
+
sentence = %{Call me before 12, 1-444-2222.}
|
63
|
+
words = %w{Call me before 12}
|
64
|
+
|
65
|
+
@parser.parse(sentence).should == words
|
66
|
+
end
|
67
|
+
|
68
|
+
it "may ignore short phone numbers while parsing a sentence" do
|
69
|
+
sentence = %{Call me before 12, 444-2222.}
|
70
|
+
words = %w{Call me before 12}
|
71
|
+
|
72
|
+
@parser.parse(sentence).should == words
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "ignoring references" do
|
77
|
+
before(:all) do
|
78
|
+
@parser = ParserClass.new
|
79
|
+
@parser.ignore_references = true
|
80
|
+
end
|
81
|
+
|
82
|
+
it "may ignore RFC style references while parsing a sentence" do
|
83
|
+
sentence = %{As one can see, it has failed [1].}
|
84
|
+
words = %w{As one can see it has failed}
|
85
|
+
|
86
|
+
@parser.parse(sentence).should == words
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "ignoring case" do
|
91
|
+
before(:all) do
|
92
|
+
@parser = ParserClass.new
|
93
|
+
@parser.ignore_case = true
|
94
|
+
end
|
95
|
+
|
96
|
+
it "may ignore case while parsing a sentence" do
|
97
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
98
|
+
words = %w{the deliverator is in touch with the road starts like a bad day stops on a peseta}
|
99
|
+
|
100
|
+
@parser.ignore_case = true
|
101
|
+
@parser.parse(sentence).should == words
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe "preserving punctuation" do
|
106
|
+
before(:all) do
|
107
|
+
@parser = ParserClass.new
|
108
|
+
@parser.ignore_punctuation = false
|
109
|
+
end
|
110
|
+
|
111
|
+
it "may preserve punctuation while parsing a sentence" do
|
112
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
113
|
+
words = %w{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
114
|
+
|
115
|
+
@parser.parse(sentence).should == words
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'wordlist/unique_filter'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe UniqueFilter do
|
6
|
+
before(:each) do
|
7
|
+
@filter = UniqueFilter.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should have seen words" do
|
11
|
+
@filter.saw!('cat')
|
12
|
+
|
13
|
+
@filter.seen?('cat').should == true
|
14
|
+
@filter.seen?('dog').should == false
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should only see a unique word once" do
|
18
|
+
@filter.saw!('cat').should == true
|
19
|
+
@filter.saw!('cat').should == false
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should pass only unique words through the filter" do
|
23
|
+
input = ['dog', 'cat', 'dog']
|
24
|
+
output = []
|
25
|
+
|
26
|
+
input.each do |word|
|
27
|
+
@filter.pass(word) do |result|
|
28
|
+
output << result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
output.should == ['dog', 'cat']
|
33
|
+
end
|
34
|
+
end
|
data/tasks/spec.rb
ADDED
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wordlist
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Postmodern
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-31 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: spidr
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.1.9
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.1.12
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: hoe
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 2.3.3
|
44
|
+
version:
|
45
|
+
description: |-
|
46
|
+
A Ruby library for generating and working with word-lists. Wordlist allows
|
47
|
+
one to efficiently generate unique word-lists from arbitrary text or
|
48
|
+
other sources, such as website content. Wordlist can also quickly enumerate
|
49
|
+
through words within an existing word-list, applying multiple mutation
|
50
|
+
rules to each word in the list.
|
51
|
+
email:
|
52
|
+
- postmodern.mod3@gmail.com
|
53
|
+
executables: []
|
54
|
+
|
55
|
+
extensions: []
|
56
|
+
|
57
|
+
extra_rdoc_files:
|
58
|
+
- History.txt
|
59
|
+
- Manifest.txt
|
60
|
+
- README.txt
|
61
|
+
- scripts/text/comedy_of_errors.txt
|
62
|
+
files:
|
63
|
+
- History.txt
|
64
|
+
- Manifest.txt
|
65
|
+
- README.txt
|
66
|
+
- Rakefile
|
67
|
+
- lib/wordlist.rb
|
68
|
+
- lib/wordlist/unique_filter.rb
|
69
|
+
- lib/wordlist/parsers.rb
|
70
|
+
- lib/wordlist/builder.rb
|
71
|
+
- lib/wordlist/builders.rb
|
72
|
+
- lib/wordlist/builders/website.rb
|
73
|
+
- lib/wordlist/mutator.rb
|
74
|
+
- lib/wordlist/list.rb
|
75
|
+
- lib/wordlist/flat_file.rb
|
76
|
+
- lib/wordlist/version.rb
|
77
|
+
- tasks/spec.rb
|
78
|
+
- scripts/benchmark
|
79
|
+
- scripts/text/comedy_of_errors.txt
|
80
|
+
- spec/classes/parser_class.rb
|
81
|
+
- spec/classes/test_list.rb
|
82
|
+
- spec/text/previous_wordlist.txt
|
83
|
+
- spec/text/sample.txt
|
84
|
+
- spec/text/flat_file.txt
|
85
|
+
- spec/spec_helper.rb
|
86
|
+
- spec/unique_filter_spec.rb
|
87
|
+
- spec/parsers_spec.rb
|
88
|
+
- spec/mutator_spec.rb
|
89
|
+
- spec/builder_spec.rb
|
90
|
+
- spec/list_spec.rb
|
91
|
+
- spec/flat_file_spec.rb
|
92
|
+
- spec/wordlist_spec.rb
|
93
|
+
has_rdoc: true
|
94
|
+
homepage: http://wordlist.rubyforge.org/
|
95
|
+
licenses: []
|
96
|
+
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options:
|
99
|
+
- --main
|
100
|
+
- README.txt
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: "0"
|
108
|
+
version:
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: "0"
|
114
|
+
version:
|
115
|
+
requirements: []
|
116
|
+
|
117
|
+
rubyforge_project: wordlist
|
118
|
+
rubygems_version: 1.3.5
|
119
|
+
signing_key:
|
120
|
+
specification_version: 3
|
121
|
+
summary: A Ruby library for generating and working with word-lists
|
122
|
+
test_files: []
|
123
|
+
|