wordlist 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ require 'wordlist/builder'
2
+
3
+ require 'spec_helper'
4
+ require 'helpers/text'
5
+ require 'builder_examples'
6
+
7
+ require 'tempfile'
8
+ require 'fileutils'
9
+
10
+ describe Builder do
11
+ describe "new wordlist" do
12
+ before(:all) do
13
+ @expected = ['dog', 'cat', 'catx', 'dat']
14
+ end
15
+
16
+ before(:each) do
17
+ @path = Tempfile.new('wordlist').path
18
+ end
19
+
20
+ it_should_behave_like "a wordlist Builder"
21
+ end
22
+
23
+ describe "existing wordlist" do
24
+ before(:all) do
25
+ @path = '/tmp/bla'
26
+ @expected = ['dog', 'cat', 'log', 'catx', 'dat']
27
+ end
28
+
29
+ before(:each) do
30
+ @path = Tempfile.new('wordlist').path
31
+ FileUtils.cp(PREVIOUS_WORDLIST,@path)
32
+ end
33
+
34
+ it_should_behave_like "a wordlist Builder"
35
+ end
36
+ end
@@ -0,0 +1,5 @@
1
+ class ParserClass
2
+
3
+ include Wordlist::Parsers
4
+
5
+ end
@@ -0,0 +1,9 @@
1
+ require 'wordlist/list'
2
+
3
+ class TestList < Wordlist::List
4
+
5
+ def each_word
6
+ yield 'omg.hackers'
7
+ end
8
+
9
+ end
@@ -0,0 +1,25 @@
1
+ require 'wordlist/flat_file'
2
+
3
+ require 'spec_helper'
4
+
5
+ describe FlatFile do
6
+ before(:all) do
7
+ @path = File.join(File.dirname(__FILE__),'text','flat_file.txt')
8
+ @list = FlatFile.new(@path)
9
+ end
10
+
11
+ it "should have a path it reads from" do
12
+ @list.path.should == @path
13
+ end
14
+
15
+ it "should read the lines of the flat-file" do
16
+ words = ['one', 'two', 'three']
17
+
18
+ @list.each_word do |word|
19
+ words.include?(word).should == true
20
+ words.delete(word)
21
+ end
22
+
23
+ words.should == []
24
+ end
25
+ end
@@ -0,0 +1,58 @@
1
+ require 'wordlist/list'
2
+
3
+ require 'spec_helper'
4
+ require 'classes/test_list'
5
+
6
+ describe List do
7
+ before(:all) do
8
+ @source = TestList.new
9
+ @source.mutate 'o', '0'
10
+ @source.mutate 'a', 'A'
11
+ @source.mutate 'e', '3'
12
+ @source.mutate 's', '5'
13
+ end
14
+
15
+ it "should iterate over each word" do
16
+ words = []
17
+
18
+ @source.each_word { |word| words << word }
19
+
20
+ words.should == ['omg.hackers']
21
+ end
22
+
23
+ it "should iterate over each unique word" do
24
+ words = []
25
+
26
+ @source.each_unique { |word| words << word }
27
+
28
+ words.should == ['omg.hackers']
29
+ end
30
+
31
+ it "should iterate over every possible mutated word" do
32
+ mutations = %w{
33
+ 0mg.hAck3r5
34
+ 0mg.hAck3rs
35
+ 0mg.hAcker5
36
+ 0mg.hAckers
37
+ 0mg.hack3r5
38
+ 0mg.hack3rs
39
+ 0mg.hacker5
40
+ 0mg.hackers
41
+ omg.hAck3r5
42
+ omg.hAck3rs
43
+ omg.hAcker5
44
+ omg.hAckers
45
+ omg.hack3r5
46
+ omg.hack3rs
47
+ omg.hacker5
48
+ omg.hackers
49
+ }
50
+
51
+ @source.each_mutation do |mutation|
52
+ mutations.include?(mutation).should == true
53
+ mutations.delete(mutation)
54
+ end
55
+
56
+ mutations.should == []
57
+ end
58
+ end
@@ -0,0 +1,43 @@
1
+ require 'wordlist/mutator'
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Mutator do
6
+ it "should replace matched text with a byte" do
7
+ mutator = Mutator.new('o',0x41)
8
+ mutator.replace('o').should == 'A'
9
+ end
10
+
11
+ it "should replace matched text with a String" do
12
+ mutator = Mutator.new('o','0')
13
+ mutator.replace('o').should == '0'
14
+ end
15
+
16
+ it "should replace matched text using a proc" do
17
+ mutator = Mutator.new('o') { |match| match * 2 }
18
+ mutator.replace('o').should == 'oo'
19
+ end
20
+
21
+ it "should iterate over every possible substitution" do
22
+ remaining = ['lolol', 'l0lol', 'lol0l', 'l0l0l']
23
+
24
+ mutator = Mutator.new(/o/,'0')
25
+ mutator.each('lolol') do |mutation|
26
+ remaining.include?(mutation).should == true
27
+ remaining.delete(mutation)
28
+ end
29
+
30
+ remaining.should == []
31
+ end
32
+
33
+ it "should iterate over the original word, if no matches were found" do
34
+ mutations = []
35
+ mutator = Mutator.new('x','0')
36
+
37
+ mutator.each('hello') do |mutant|
38
+ mutations << mutant
39
+ end
40
+
41
+ mutations.should == ['hello']
42
+ end
43
+ end
@@ -0,0 +1,118 @@
1
+ require 'spec_helper'
2
+ require 'classes/parser_class'
3
+
4
+ describe Parsers do
5
+ describe "default" do
6
+ before(:all) do
7
+ @parser = ParserClass.new
8
+ end
9
+
10
+ it "should parse words from a sentence" do
11
+ sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
12
+ words = %w{The Deliverator is in touch with the road starts like a bad day stops on a peseta}
13
+
14
+ @parser.parse(sentence).should == words
15
+ end
16
+
17
+ it "should ignore punctuation by default while parsing a sentence" do
18
+ sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
19
+ words = %w{
20
+ Oh they used to argue over times many corporate driver-years lost to it homeowners red-faced and sweaty with their own lies stinking of Old Spice and job-related stress standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink I swear can't you guys tell time
21
+ }
22
+
23
+ @parser.parse(sentence).should == words
24
+ end
25
+
26
+ it "should ignore URLs by default while parsing a sentence" do
27
+ sentence = %{Click on the following link: http://www.example.com/}
28
+ words = %w{Click on the following link}
29
+
30
+ @parser.parse(sentence).should == words
31
+ end
32
+
33
+ it "should ignore short URIs by default while parsing a sentence" do
34
+ sentence = %{Click on the following link: jabber://}
35
+ words = %w{Click on the following link}
36
+
37
+ @parser.parse(sentence).should == words
38
+ end
39
+
40
+ it "should ignore complex HTTP URLs by default while parsing a sentence" do
41
+ sentence = %{Click on the following link: http://www.google.com/search?hl=en&client=firefox-a&rls=org.mozilla:en-US:official&hs=jU&q=ruby+datamapper&start=20&sa=N}
42
+ words = %w{Click on the following link}
43
+
44
+ @parser.parse(sentence).should == words
45
+ end
46
+ end
47
+
48
+ describe "ignoring phone numbers" do
49
+ before(:all) do
50
+ @parser = ParserClass.new
51
+ @parser.ignore_phone_numbers = true
52
+ end
53
+
54
+ it "may ignore phone numbers while parsing a sentence" do
55
+ sentence = %{Call me before 12, 1-888-444-2222.}
56
+ words = %w{Call me before 12}
57
+
58
+ @parser.parse(sentence).should == words
59
+ end
60
+
61
+ it "may ignore long-distance phone numbers while parsing a sentence" do
62
+ sentence = %{Call me before 12, 1-444-2222.}
63
+ words = %w{Call me before 12}
64
+
65
+ @parser.parse(sentence).should == words
66
+ end
67
+
68
+ it "may ignore short phone numbers while parsing a sentence" do
69
+ sentence = %{Call me before 12, 444-2222.}
70
+ words = %w{Call me before 12}
71
+
72
+ @parser.parse(sentence).should == words
73
+ end
74
+ end
75
+
76
+ describe "ignoring references" do
77
+ before(:all) do
78
+ @parser = ParserClass.new
79
+ @parser.ignore_references = true
80
+ end
81
+
82
+ it "may ignore RFC style references while parsing a sentence" do
83
+ sentence = %{As one can see, it has failed [1].}
84
+ words = %w{As one can see it has failed}
85
+
86
+ @parser.parse(sentence).should == words
87
+ end
88
+ end
89
+
90
+ describe "ignoring case" do
91
+ before(:all) do
92
+ @parser = ParserClass.new
93
+ @parser.ignore_case = true
94
+ end
95
+
96
+ it "may ignore case while parsing a sentence" do
97
+ sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
98
+ words = %w{the deliverator is in touch with the road starts like a bad day stops on a peseta}
99
+
100
+ @parser.ignore_case = true
101
+ @parser.parse(sentence).should == words
102
+ end
103
+ end
104
+
105
+ describe "preserving punctuation" do
106
+ before(:all) do
107
+ @parser = ParserClass.new
108
+ @parser.ignore_punctuation = false
109
+ end
110
+
111
+ it "may preserve punctuation while parsing a sentence" do
112
+ sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
113
+ words = %w{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
114
+
115
+ @parser.parse(sentence).should == words
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ gem 'rspec', '>=1.1.12'
3
+ require 'spec'
4
+
5
+ require 'wordlist/version'
6
+
7
+ include Wordlist
@@ -0,0 +1,3 @@
1
+ one
2
+ two
3
+ three
@@ -0,0 +1,3 @@
1
+ cat
2
+ dog
3
+ log
@@ -0,0 +1,3 @@
1
+ dog cat, cat dat catx.
2
+ dog, dog, dog,
3
+ cat. dog, dat.
@@ -0,0 +1,34 @@
1
+ require 'wordlist/unique_filter'
2
+
3
+ require 'spec_helper'
4
+
5
+ describe UniqueFilter do
6
+ before(:each) do
7
+ @filter = UniqueFilter.new
8
+ end
9
+
10
+ it "should have seen words" do
11
+ @filter.saw!('cat')
12
+
13
+ @filter.seen?('cat').should == true
14
+ @filter.seen?('dog').should == false
15
+ end
16
+
17
+ it "should only see a unique word once" do
18
+ @filter.saw!('cat').should == true
19
+ @filter.saw!('cat').should == false
20
+ end
21
+
22
+ it "should pass only unique words through the filter" do
23
+ input = ['dog', 'cat', 'dog']
24
+ output = []
25
+
26
+ input.each do |word|
27
+ @filter.pass(word) do |result|
28
+ output << result
29
+ end
30
+ end
31
+
32
+ output.should == ['dog', 'cat']
33
+ end
34
+ end
@@ -0,0 +1,9 @@
1
+ require 'wordlist/version'
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Wordlist do
6
+ it "should have a VERSION constant" do
7
+ Wordlist.const_defined?('VERSION').should == true
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec/rake/spectask'
2
+
3
+ desc "Run all specifications"
4
+ Spec::Rake::SpecTask.new(:spec) do |t|
5
+ t.libs += ['lib', 'spec']
6
+ t.spec_opts = ['--colour', '--format', 'specdoc']
7
+ end
8
+
9
+ task :default => :spec
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wordlist
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Postmodern
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-31 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: spidr
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.1.9
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.1.12
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hoe
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 2.3.3
44
+ version:
45
+ description: |-
46
+ A Ruby library for generating and working with word-lists. Wordlist allows
47
+ one to efficiently generate unique word-lists from arbitrary text or
48
+ other sources, such as website content. Wordlist can also quickly enumerate
49
+ through words within an existing word-list, applying multiple mutation
50
+ rules to each word in the list.
51
+ email:
52
+ - postmodern.mod3@gmail.com
53
+ executables: []
54
+
55
+ extensions: []
56
+
57
+ extra_rdoc_files:
58
+ - History.txt
59
+ - Manifest.txt
60
+ - README.txt
61
+ - scripts/text/comedy_of_errors.txt
62
+ files:
63
+ - History.txt
64
+ - Manifest.txt
65
+ - README.txt
66
+ - Rakefile
67
+ - lib/wordlist.rb
68
+ - lib/wordlist/unique_filter.rb
69
+ - lib/wordlist/parsers.rb
70
+ - lib/wordlist/builder.rb
71
+ - lib/wordlist/builders.rb
72
+ - lib/wordlist/builders/website.rb
73
+ - lib/wordlist/mutator.rb
74
+ - lib/wordlist/list.rb
75
+ - lib/wordlist/flat_file.rb
76
+ - lib/wordlist/version.rb
77
+ - tasks/spec.rb
78
+ - scripts/benchmark
79
+ - scripts/text/comedy_of_errors.txt
80
+ - spec/classes/parser_class.rb
81
+ - spec/classes/test_list.rb
82
+ - spec/text/previous_wordlist.txt
83
+ - spec/text/sample.txt
84
+ - spec/text/flat_file.txt
85
+ - spec/spec_helper.rb
86
+ - spec/unique_filter_spec.rb
87
+ - spec/parsers_spec.rb
88
+ - spec/mutator_spec.rb
89
+ - spec/builder_spec.rb
90
+ - spec/list_spec.rb
91
+ - spec/flat_file_spec.rb
92
+ - spec/wordlist_spec.rb
93
+ has_rdoc: true
94
+ homepage: http://wordlist.rubyforge.org/
95
+ licenses: []
96
+
97
+ post_install_message:
98
+ rdoc_options:
99
+ - --main
100
+ - README.txt
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: "0"
108
+ version:
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: "0"
114
+ version:
115
+ requirements: []
116
+
117
+ rubyforge_project: wordlist
118
+ rubygems_version: 1.3.5
119
+ signing_key:
120
+ specification_version: 3
121
+ summary: A Ruby library for generating and working with word-lists
122
+ test_files: []
123
+