wordlist 0.1.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/ruby.yml +27 -0
- data/.gitignore +6 -3
- data/ChangeLog.md +45 -1
- data/Gemfile +13 -0
- data/LICENSE.txt +1 -3
- data/README.md +266 -61
- data/Rakefile +7 -32
- data/benchmarks.rb +115 -0
- data/bin/wordlist +4 -7
- data/data/stop_words/ar.txt +104 -0
- data/data/stop_words/bg.txt +259 -0
- data/data/stop_words/bn.txt +363 -0
- data/data/stop_words/ca.txt +126 -0
- data/data/stop_words/cs.txt +138 -0
- data/data/stop_words/da.txt +101 -0
- data/data/stop_words/de.txt +129 -0
- data/data/stop_words/el.txt +79 -0
- data/data/stop_words/en.txt +175 -0
- data/data/stop_words/es.txt +178 -0
- data/data/stop_words/eu.txt +98 -0
- data/data/stop_words/fa.txt +332 -0
- data/data/stop_words/fi.txt +747 -0
- data/data/stop_words/fr.txt +116 -0
- data/data/stop_words/ga.txt +109 -0
- data/data/stop_words/gl.txt +160 -0
- data/data/stop_words/he.txt +499 -0
- data/data/stop_words/hi.txt +97 -0
- data/data/stop_words/hr.txt +179 -0
- data/data/stop_words/hu.txt +35 -0
- data/data/stop_words/hy.txt +45 -0
- data/data/stop_words/id.txt +357 -0
- data/data/stop_words/it.txt +134 -0
- data/data/stop_words/ja.txt +44 -0
- data/data/stop_words/ko.txt +677 -0
- data/data/stop_words/ku.txt +63 -0
- data/data/stop_words/lt.txt +507 -0
- data/data/stop_words/lv.txt +163 -0
- data/data/stop_words/mr.txt +99 -0
- data/data/stop_words/nl.txt +48 -0
- data/data/stop_words/no.txt +172 -0
- data/data/stop_words/pl.txt +138 -0
- data/data/stop_words/pt.txt +147 -0
- data/data/stop_words/ro.txt +281 -0
- data/data/stop_words/ru.txt +421 -0
- data/data/stop_words/sk.txt +173 -0
- data/data/stop_words/sv.txt +386 -0
- data/data/stop_words/th.txt +115 -0
- data/data/stop_words/tr.txt +114 -0
- data/data/stop_words/uk.txt +28 -0
- data/data/stop_words/ur.txt +513 -0
- data/data/stop_words/zh.txt +125 -0
- data/gemspec.yml +4 -10
- data/lib/wordlist/abstract_wordlist.rb +24 -0
- data/lib/wordlist/builder.rb +170 -138
- data/lib/wordlist/cli.rb +458 -0
- data/lib/wordlist/compression/reader.rb +72 -0
- data/lib/wordlist/compression/writer.rb +80 -0
- data/lib/wordlist/exceptions.rb +31 -0
- data/lib/wordlist/file.rb +176 -0
- data/lib/wordlist/format.rb +38 -0
- data/lib/wordlist/lexer/lang.rb +32 -0
- data/lib/wordlist/lexer/stop_words.rb +68 -0
- data/lib/wordlist/lexer.rb +218 -0
- data/lib/wordlist/list_methods.rb +462 -0
- data/lib/wordlist/modifiers/capitalize.rb +45 -0
- data/lib/wordlist/modifiers/downcase.rb +45 -0
- data/lib/wordlist/modifiers/gsub.rb +51 -0
- data/lib/wordlist/modifiers/modifier.rb +44 -0
- data/lib/wordlist/modifiers/mutate.rb +133 -0
- data/lib/wordlist/modifiers/mutate_case.rb +25 -0
- data/lib/wordlist/modifiers/sub.rb +97 -0
- data/lib/wordlist/modifiers/tr.rb +71 -0
- data/lib/wordlist/modifiers/upcase.rb +45 -0
- data/lib/wordlist/modifiers.rb +8 -0
- data/lib/wordlist/operators/binary_operator.rb +38 -0
- data/lib/wordlist/operators/concat.rb +47 -0
- data/lib/wordlist/operators/intersect.rb +55 -0
- data/lib/wordlist/operators/operator.rb +29 -0
- data/lib/wordlist/operators/power.rb +72 -0
- data/lib/wordlist/operators/product.rb +50 -0
- data/lib/wordlist/operators/subtract.rb +54 -0
- data/lib/wordlist/operators/unary_operator.rb +29 -0
- data/lib/wordlist/operators/union.rb +61 -0
- data/lib/wordlist/operators/unique.rb +52 -0
- data/lib/wordlist/operators.rb +7 -0
- data/lib/wordlist/unique_filter.rb +40 -61
- data/lib/wordlist/version.rb +1 -1
- data/lib/wordlist/words.rb +71 -0
- data/lib/wordlist.rb +103 -2
- data/spec/abstract_list_spec.rb +18 -0
- data/spec/builder_spec.rb +220 -76
- data/spec/cli_spec.rb +801 -0
- data/spec/compression/reader_spec.rb +137 -0
- data/spec/compression/writer_spec.rb +194 -0
- data/spec/file_spec.rb +258 -0
- data/spec/fixtures/wordlist.txt +15 -0
- data/spec/fixtures/wordlist.txt.bz2 +0 -0
- data/spec/fixtures/wordlist.txt.gz +0 -0
- data/spec/fixtures/wordlist.txt.xz +0 -0
- data/spec/fixtures/wordlist_with_ambiguous_format +3 -0
- data/spec/fixtures/wordlist_with_comments.txt +19 -0
- data/spec/fixtures/wordlist_with_empty_lines.txt +19 -0
- data/spec/format_spec.rb +50 -0
- data/spec/helpers/text.rb +3 -3
- data/spec/helpers/wordlist.rb +2 -2
- data/spec/lexer/lang_spec.rb +70 -0
- data/spec/lexer/stop_words_spec.rb +77 -0
- data/spec/lexer_spec.rb +652 -0
- data/spec/list_methods_spec.rb +181 -0
- data/spec/modifiers/capitalize_spec.rb +27 -0
- data/spec/modifiers/downcase_spec.rb +27 -0
- data/spec/modifiers/gsub_spec.rb +59 -0
- data/spec/modifiers/modifier_spec.rb +20 -0
- data/spec/modifiers/mutate_case_spec.rb +46 -0
- data/spec/modifiers/mutate_spec.rb +39 -0
- data/spec/modifiers/sub_spec.rb +98 -0
- data/spec/modifiers/tr_spec.rb +46 -0
- data/spec/modifiers/upcase_spec.rb +27 -0
- data/spec/operators/binary_operator_spec.rb +19 -0
- data/spec/operators/concat_spec.rb +26 -0
- data/spec/operators/intersect_spec.rb +37 -0
- data/spec/operators/operator_spec.rb +16 -0
- data/spec/operators/power_spec.rb +57 -0
- data/spec/operators/product_spec.rb +39 -0
- data/spec/operators/subtract_spec.rb +37 -0
- data/spec/operators/union_spec.rb +37 -0
- data/spec/operators/unique_spec.rb +25 -0
- data/spec/spec_helper.rb +2 -1
- data/spec/unique_filter_spec.rb +108 -18
- data/spec/wordlist_spec.rb +55 -3
- data/spec/words_spec.rb +41 -0
- metadata +183 -120
- data/lib/wordlist/builders/website.rb +0 -216
- data/lib/wordlist/builders.rb +0 -1
- data/lib/wordlist/flat_file.rb +0 -47
- data/lib/wordlist/list.rb +0 -162
- data/lib/wordlist/mutator.rb +0 -113
- data/lib/wordlist/parsers.rb +0 -74
- data/lib/wordlist/runners/list.rb +0 -116
- data/lib/wordlist/runners/runner.rb +0 -67
- data/lib/wordlist/runners.rb +0 -2
- data/scripts/benchmark +0 -59
- data/scripts/text/comedy_of_errors.txt +0 -4011
- data/spec/flat_file_spec.rb +0 -25
- data/spec/list_spec.rb +0 -58
- data/spec/mutator_spec.rb +0 -43
- data/spec/parsers_spec.rb +0 -118
data/spec/flat_file_spec.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
require 'wordlist/flat_file'
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
|
-
describe FlatFile do
|
6
|
-
before(:all) do
|
7
|
-
@path = File.join(File.dirname(__FILE__),'text','flat_file.txt')
|
8
|
-
@list = FlatFile.new(@path)
|
9
|
-
end
|
10
|
-
|
11
|
-
it "should have a path it reads from" do
|
12
|
-
@list.path.should == @path
|
13
|
-
end
|
14
|
-
|
15
|
-
it "should read the lines of the flat-file" do
|
16
|
-
words = ['one', 'two', 'three']
|
17
|
-
|
18
|
-
@list.each_word do |word|
|
19
|
-
words.include?(word).should == true
|
20
|
-
words.delete(word)
|
21
|
-
end
|
22
|
-
|
23
|
-
words.should == []
|
24
|
-
end
|
25
|
-
end
|
data/spec/list_spec.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require 'wordlist/list'
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
require 'classes/test_list'
|
5
|
-
|
6
|
-
describe List do
|
7
|
-
before(:all) do
|
8
|
-
@source = TestList.new
|
9
|
-
@source.mutate 'o', '0'
|
10
|
-
@source.mutate 'a', 'A'
|
11
|
-
@source.mutate 'e', '3'
|
12
|
-
@source.mutate 's', '5'
|
13
|
-
end
|
14
|
-
|
15
|
-
it "should iterate over each word" do
|
16
|
-
words = []
|
17
|
-
|
18
|
-
@source.each_word { |word| words << word }
|
19
|
-
|
20
|
-
words.should == ['omg.hackers']
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should iterate over each unique word" do
|
24
|
-
words = []
|
25
|
-
|
26
|
-
@source.each_unique { |word| words << word }
|
27
|
-
|
28
|
-
words.should == ['omg.hackers']
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should iterate over every possible mutated word" do
|
32
|
-
mutations = %w{
|
33
|
-
0mg.hAck3r5
|
34
|
-
0mg.hAck3rs
|
35
|
-
0mg.hAcker5
|
36
|
-
0mg.hAckers
|
37
|
-
0mg.hack3r5
|
38
|
-
0mg.hack3rs
|
39
|
-
0mg.hacker5
|
40
|
-
0mg.hackers
|
41
|
-
omg.hAck3r5
|
42
|
-
omg.hAck3rs
|
43
|
-
omg.hAcker5
|
44
|
-
omg.hAckers
|
45
|
-
omg.hack3r5
|
46
|
-
omg.hack3rs
|
47
|
-
omg.hacker5
|
48
|
-
omg.hackers
|
49
|
-
}
|
50
|
-
|
51
|
-
@source.each_mutation do |mutation|
|
52
|
-
mutations.include?(mutation).should == true
|
53
|
-
mutations.delete(mutation)
|
54
|
-
end
|
55
|
-
|
56
|
-
mutations.should == []
|
57
|
-
end
|
58
|
-
end
|
data/spec/mutator_spec.rb
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
require 'wordlist/mutator'
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
|
-
describe Mutator do
|
6
|
-
it "should replace matched text with a byte" do
|
7
|
-
mutator = Mutator.new('o',0x41)
|
8
|
-
mutator.replace('o').should == 'A'
|
9
|
-
end
|
10
|
-
|
11
|
-
it "should replace matched text with a String" do
|
12
|
-
mutator = Mutator.new('o','0')
|
13
|
-
mutator.replace('o').should == '0'
|
14
|
-
end
|
15
|
-
|
16
|
-
it "should replace matched text using a proc" do
|
17
|
-
mutator = Mutator.new('o') { |match| match * 2 }
|
18
|
-
mutator.replace('o').should == 'oo'
|
19
|
-
end
|
20
|
-
|
21
|
-
it "should iterate over every possible substitution" do
|
22
|
-
remaining = ['lolol', 'l0lol', 'lol0l', 'l0l0l']
|
23
|
-
|
24
|
-
mutator = Mutator.new(/o/,'0')
|
25
|
-
mutator.each('lolol') do |mutation|
|
26
|
-
remaining.include?(mutation).should == true
|
27
|
-
remaining.delete(mutation)
|
28
|
-
end
|
29
|
-
|
30
|
-
remaining.should == []
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should iterate over the original word, if no matches were found" do
|
34
|
-
mutations = []
|
35
|
-
mutator = Mutator.new('x','0')
|
36
|
-
|
37
|
-
mutator.each('hello') do |mutant|
|
38
|
-
mutations << mutant
|
39
|
-
end
|
40
|
-
|
41
|
-
mutations.should == ['hello']
|
42
|
-
end
|
43
|
-
end
|
data/spec/parsers_spec.rb
DELETED
@@ -1,118 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'classes/parser_class'
|
3
|
-
|
4
|
-
describe Parsers do
|
5
|
-
describe "default" do
|
6
|
-
before(:all) do
|
7
|
-
@parser = ParserClass.new
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should parse words from a sentence" do
|
11
|
-
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
12
|
-
words = %w{The Deliverator is in touch with the road starts like a bad day stops on a peseta}
|
13
|
-
|
14
|
-
@parser.parse(sentence).should == words
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should ignore punctuation by default while parsing a sentence" do
|
18
|
-
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
19
|
-
words = %w{
|
20
|
-
Oh they used to argue over times many corporate driver-years lost to it homeowners red-faced and sweaty with their own lies stinking of Old Spice and job-related stress standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink I swear can't you guys tell time
|
21
|
-
}
|
22
|
-
|
23
|
-
@parser.parse(sentence).should == words
|
24
|
-
end
|
25
|
-
|
26
|
-
it "should ignore URLs by default while parsing a sentence" do
|
27
|
-
sentence = %{Click on the following link: http://www.example.com/}
|
28
|
-
words = %w{Click on the following link}
|
29
|
-
|
30
|
-
@parser.parse(sentence).should == words
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should ignore short URIs by default while parsing a sentence" do
|
34
|
-
sentence = %{Click on the following link: jabber://}
|
35
|
-
words = %w{Click on the following link}
|
36
|
-
|
37
|
-
@parser.parse(sentence).should == words
|
38
|
-
end
|
39
|
-
|
40
|
-
it "should ignore complex HTTP URLs by default while parsing a sentence" do
|
41
|
-
sentence = %{Click on the following link: http://www.google.com/search?hl=en&client=firefox-a&rls=org.mozilla:en-US:official&hs=jU&q=ruby+datamapper&start=20&sa=N}
|
42
|
-
words = %w{Click on the following link}
|
43
|
-
|
44
|
-
@parser.parse(sentence).should == words
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
describe "ignoring phone numbers" do
|
49
|
-
before(:all) do
|
50
|
-
@parser = ParserClass.new
|
51
|
-
@parser.ignore_phone_numbers = true
|
52
|
-
end
|
53
|
-
|
54
|
-
it "may ignore phone numbers while parsing a sentence" do
|
55
|
-
sentence = %{Call me before 12, 1-888-444-2222.}
|
56
|
-
words = %w{Call me before 12}
|
57
|
-
|
58
|
-
@parser.parse(sentence).should == words
|
59
|
-
end
|
60
|
-
|
61
|
-
it "may ignore long-distance phone numbers while parsing a sentence" do
|
62
|
-
sentence = %{Call me before 12, 1-444-2222.}
|
63
|
-
words = %w{Call me before 12}
|
64
|
-
|
65
|
-
@parser.parse(sentence).should == words
|
66
|
-
end
|
67
|
-
|
68
|
-
it "may ignore short phone numbers while parsing a sentence" do
|
69
|
-
sentence = %{Call me before 12, 444-2222.}
|
70
|
-
words = %w{Call me before 12}
|
71
|
-
|
72
|
-
@parser.parse(sentence).should == words
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
describe "ignoring references" do
|
77
|
-
before(:all) do
|
78
|
-
@parser = ParserClass.new
|
79
|
-
@parser.ignore_references = true
|
80
|
-
end
|
81
|
-
|
82
|
-
it "may ignore RFC style references while parsing a sentence" do
|
83
|
-
sentence = %{As one can see, it has failed [1].}
|
84
|
-
words = %w{As one can see it has failed}
|
85
|
-
|
86
|
-
@parser.parse(sentence).should == words
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
describe "ignoring case" do
|
91
|
-
before(:all) do
|
92
|
-
@parser = ParserClass.new
|
93
|
-
@parser.ignore_case = true
|
94
|
-
end
|
95
|
-
|
96
|
-
it "may ignore case while parsing a sentence" do
|
97
|
-
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
98
|
-
words = %w{the deliverator is in touch with the road starts like a bad day stops on a peseta}
|
99
|
-
|
100
|
-
@parser.ignore_case = true
|
101
|
-
@parser.parse(sentence).should == words
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
describe "preserving punctuation" do
|
106
|
-
before(:all) do
|
107
|
-
@parser = ParserClass.new
|
108
|
-
@parser.ignore_punctuation = false
|
109
|
-
end
|
110
|
-
|
111
|
-
it "may preserve punctuation while parsing a sentence" do
|
112
|
-
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
113
|
-
words = %w{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
114
|
-
|
115
|
-
@parser.parse(sentence).should == words
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|