wordlist 0.1.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby.yml +27 -0
  3. data/.gitignore +6 -3
  4. data/ChangeLog.md +45 -1
  5. data/Gemfile +13 -0
  6. data/LICENSE.txt +1 -3
  7. data/README.md +266 -61
  8. data/Rakefile +7 -32
  9. data/benchmarks.rb +115 -0
  10. data/bin/wordlist +4 -7
  11. data/data/stop_words/ar.txt +104 -0
  12. data/data/stop_words/bg.txt +259 -0
  13. data/data/stop_words/bn.txt +363 -0
  14. data/data/stop_words/ca.txt +126 -0
  15. data/data/stop_words/cs.txt +138 -0
  16. data/data/stop_words/da.txt +101 -0
  17. data/data/stop_words/de.txt +129 -0
  18. data/data/stop_words/el.txt +79 -0
  19. data/data/stop_words/en.txt +175 -0
  20. data/data/stop_words/es.txt +178 -0
  21. data/data/stop_words/eu.txt +98 -0
  22. data/data/stop_words/fa.txt +332 -0
  23. data/data/stop_words/fi.txt +747 -0
  24. data/data/stop_words/fr.txt +116 -0
  25. data/data/stop_words/ga.txt +109 -0
  26. data/data/stop_words/gl.txt +160 -0
  27. data/data/stop_words/he.txt +499 -0
  28. data/data/stop_words/hi.txt +97 -0
  29. data/data/stop_words/hr.txt +179 -0
  30. data/data/stop_words/hu.txt +35 -0
  31. data/data/stop_words/hy.txt +45 -0
  32. data/data/stop_words/id.txt +357 -0
  33. data/data/stop_words/it.txt +134 -0
  34. data/data/stop_words/ja.txt +44 -0
  35. data/data/stop_words/ko.txt +677 -0
  36. data/data/stop_words/ku.txt +63 -0
  37. data/data/stop_words/lt.txt +507 -0
  38. data/data/stop_words/lv.txt +163 -0
  39. data/data/stop_words/mr.txt +99 -0
  40. data/data/stop_words/nl.txt +48 -0
  41. data/data/stop_words/no.txt +172 -0
  42. data/data/stop_words/pl.txt +138 -0
  43. data/data/stop_words/pt.txt +147 -0
  44. data/data/stop_words/ro.txt +281 -0
  45. data/data/stop_words/ru.txt +421 -0
  46. data/data/stop_words/sk.txt +173 -0
  47. data/data/stop_words/sv.txt +386 -0
  48. data/data/stop_words/th.txt +115 -0
  49. data/data/stop_words/tr.txt +114 -0
  50. data/data/stop_words/uk.txt +28 -0
  51. data/data/stop_words/ur.txt +513 -0
  52. data/data/stop_words/zh.txt +125 -0
  53. data/gemspec.yml +4 -10
  54. data/lib/wordlist/abstract_wordlist.rb +24 -0
  55. data/lib/wordlist/builder.rb +170 -138
  56. data/lib/wordlist/cli.rb +458 -0
  57. data/lib/wordlist/compression/reader.rb +72 -0
  58. data/lib/wordlist/compression/writer.rb +80 -0
  59. data/lib/wordlist/exceptions.rb +31 -0
  60. data/lib/wordlist/file.rb +176 -0
  61. data/lib/wordlist/format.rb +38 -0
  62. data/lib/wordlist/lexer/lang.rb +32 -0
  63. data/lib/wordlist/lexer/stop_words.rb +68 -0
  64. data/lib/wordlist/lexer.rb +218 -0
  65. data/lib/wordlist/list_methods.rb +462 -0
  66. data/lib/wordlist/modifiers/capitalize.rb +45 -0
  67. data/lib/wordlist/modifiers/downcase.rb +45 -0
  68. data/lib/wordlist/modifiers/gsub.rb +51 -0
  69. data/lib/wordlist/modifiers/modifier.rb +44 -0
  70. data/lib/wordlist/modifiers/mutate.rb +133 -0
  71. data/lib/wordlist/modifiers/mutate_case.rb +25 -0
  72. data/lib/wordlist/modifiers/sub.rb +97 -0
  73. data/lib/wordlist/modifiers/tr.rb +71 -0
  74. data/lib/wordlist/modifiers/upcase.rb +45 -0
  75. data/lib/wordlist/modifiers.rb +8 -0
  76. data/lib/wordlist/operators/binary_operator.rb +38 -0
  77. data/lib/wordlist/operators/concat.rb +47 -0
  78. data/lib/wordlist/operators/intersect.rb +55 -0
  79. data/lib/wordlist/operators/operator.rb +29 -0
  80. data/lib/wordlist/operators/power.rb +72 -0
  81. data/lib/wordlist/operators/product.rb +50 -0
  82. data/lib/wordlist/operators/subtract.rb +54 -0
  83. data/lib/wordlist/operators/unary_operator.rb +29 -0
  84. data/lib/wordlist/operators/union.rb +61 -0
  85. data/lib/wordlist/operators/unique.rb +52 -0
  86. data/lib/wordlist/operators.rb +7 -0
  87. data/lib/wordlist/unique_filter.rb +40 -61
  88. data/lib/wordlist/version.rb +1 -1
  89. data/lib/wordlist/words.rb +71 -0
  90. data/lib/wordlist.rb +103 -2
  91. data/spec/abstract_list_spec.rb +18 -0
  92. data/spec/builder_spec.rb +220 -76
  93. data/spec/cli_spec.rb +801 -0
  94. data/spec/compression/reader_spec.rb +137 -0
  95. data/spec/compression/writer_spec.rb +194 -0
  96. data/spec/file_spec.rb +258 -0
  97. data/spec/fixtures/wordlist.txt +15 -0
  98. data/spec/fixtures/wordlist.txt.bz2 +0 -0
  99. data/spec/fixtures/wordlist.txt.gz +0 -0
  100. data/spec/fixtures/wordlist.txt.xz +0 -0
  101. data/spec/fixtures/wordlist_with_ambiguous_format +3 -0
  102. data/spec/fixtures/wordlist_with_comments.txt +19 -0
  103. data/spec/fixtures/wordlist_with_empty_lines.txt +19 -0
  104. data/spec/format_spec.rb +50 -0
  105. data/spec/helpers/text.rb +3 -3
  106. data/spec/helpers/wordlist.rb +2 -2
  107. data/spec/lexer/lang_spec.rb +70 -0
  108. data/spec/lexer/stop_words_spec.rb +77 -0
  109. data/spec/lexer_spec.rb +652 -0
  110. data/spec/list_methods_spec.rb +181 -0
  111. data/spec/modifiers/capitalize_spec.rb +27 -0
  112. data/spec/modifiers/downcase_spec.rb +27 -0
  113. data/spec/modifiers/gsub_spec.rb +59 -0
  114. data/spec/modifiers/modifier_spec.rb +20 -0
  115. data/spec/modifiers/mutate_case_spec.rb +46 -0
  116. data/spec/modifiers/mutate_spec.rb +39 -0
  117. data/spec/modifiers/sub_spec.rb +98 -0
  118. data/spec/modifiers/tr_spec.rb +46 -0
  119. data/spec/modifiers/upcase_spec.rb +27 -0
  120. data/spec/operators/binary_operator_spec.rb +19 -0
  121. data/spec/operators/concat_spec.rb +26 -0
  122. data/spec/operators/intersect_spec.rb +37 -0
  123. data/spec/operators/operator_spec.rb +16 -0
  124. data/spec/operators/power_spec.rb +57 -0
  125. data/spec/operators/product_spec.rb +39 -0
  126. data/spec/operators/subtract_spec.rb +37 -0
  127. data/spec/operators/union_spec.rb +37 -0
  128. data/spec/operators/unique_spec.rb +25 -0
  129. data/spec/spec_helper.rb +2 -1
  130. data/spec/unique_filter_spec.rb +108 -18
  131. data/spec/wordlist_spec.rb +55 -3
  132. data/spec/words_spec.rb +41 -0
  133. metadata +183 -120
  134. data/lib/wordlist/builders/website.rb +0 -216
  135. data/lib/wordlist/builders.rb +0 -1
  136. data/lib/wordlist/flat_file.rb +0 -47
  137. data/lib/wordlist/list.rb +0 -162
  138. data/lib/wordlist/mutator.rb +0 -113
  139. data/lib/wordlist/parsers.rb +0 -74
  140. data/lib/wordlist/runners/list.rb +0 -116
  141. data/lib/wordlist/runners/runner.rb +0 -67
  142. data/lib/wordlist/runners.rb +0 -2
  143. data/scripts/benchmark +0 -59
  144. data/scripts/text/comedy_of_errors.txt +0 -4011
  145. data/spec/flat_file_spec.rb +0 -25
  146. data/spec/list_spec.rb +0 -58
  147. data/spec/mutator_spec.rb +0 -43
  148. data/spec/parsers_spec.rb +0 -118
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+ require 'wordlist/operators/power'
3
+
4
+ describe Wordlist::Operators::Power do
5
+ let(:wordlist) { %w[foo bar] }
6
+ let(:exponent) { 3 }
7
+
8
+ subject { described_class.new(wordlist,exponent) }
9
+
10
+ let(:expected_words) do
11
+ %w[
12
+ foofoofoo
13
+ foofoobar
14
+ foobarfoo
15
+ foobarbar
16
+ barfoofoo
17
+ barfoobar
18
+ barbarfoo
19
+ barbarbar
20
+ ]
21
+ end
22
+
23
+ describe "#initialize" do
24
+ it "must set #wordlist to a #{Operators::Product} object" do
25
+ expect(subject.wordlists).to be_kind_of(Operators::Product)
26
+ expect(subject.wordlists.left).to eq(wordlist)
27
+ expect(subject.wordlists.right).to be_kind_of(Operators::Product)
28
+ expect(subject.wordlists.right.left).to eq(wordlist)
29
+ expect(subject.wordlists.right.right).to eq(wordlist)
30
+ end
31
+
32
+ context "when the exponent is 1" do
33
+ let(:exponent) { 1 }
34
+
35
+ it "must set #wordlists to the given wordlist" do
36
+ expect(subject.wordlists).to eq(wordlist)
37
+ end
38
+ end
39
+ end
40
+
41
+ describe "#each" do
42
+ context "when given a block" do
43
+ it "must yield words from the left-hand wordlist with the right-hand" do
44
+ expect { |b|
45
+ subject.each(&b)
46
+ }.to yield_successive_args(*expected_words)
47
+ end
48
+ end
49
+
50
+ context "when not given a block" do
51
+ it "must return an Enumerator for the #each" do
52
+ expect(subject.each).to be_kind_of(Enumerator)
53
+ expect(subject.each.to_a).to eq(expected_words)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+ require 'wordlist/operators/product'
3
+
4
+ describe Wordlist::Operators::Product do
5
+ let(:left) { %w[foo bar baz qux] }
6
+ let(:right) { %w[abc xyz] }
7
+
8
+ subject { described_class.new(left,right) }
9
+
10
+ let(:expected_words) do
11
+ %w[
12
+ fooabc
13
+ fooxyz
14
+ barabc
15
+ barxyz
16
+ bazabc
17
+ bazxyz
18
+ quxabc
19
+ quxxyz
20
+ ]
21
+ end
22
+
23
+ describe "#each" do
24
+ context "when given a block" do
25
+ it "must yield words from the left-hand wordlist with the right-hand" do
26
+ expect { |b|
27
+ subject.each(&b)
28
+ }.to yield_successive_args(*expected_words)
29
+ end
30
+ end
31
+
32
+ context "when not given a block" do
33
+ it "must return an Enumerator for the #each" do
34
+ expect(subject.each).to be_kind_of(Enumerator)
35
+ expect(subject.each.to_a).to eq(expected_words)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+ require 'wordlist/operators/subtract'
3
+
4
+ describe Wordlist::Operators::Subtract do
5
+ let(:left) { %w[foo bar] }
6
+ let(:right) { %w[bar baz] }
7
+
8
+ subject { described_class.new(left,right) }
9
+
10
+ describe "#each" do
11
+ context "when given a block" do
12
+ it "must yield the words which do not exist in other wordlists" do
13
+ expect { |b|
14
+ subject.each(&b)
15
+ }.to yield_successive_args(*(left - right))
16
+ end
17
+
18
+ context "when the wordlists do not have any common words" do
19
+ let(:left) { %w[foo bar] }
20
+ let(:right) { %w[baz qux] }
21
+
22
+ it "must yield the left-hand operand's words" do
23
+ expect { |b|
24
+ subject.each(&b)
25
+ }.to yield_successive_args(*left)
26
+ end
27
+ end
28
+ end
29
+
30
+ context "when not given a block" do
31
+ it "must return an Enumerator for the #each" do
32
+ expect(subject.each).to be_kind_of(Enumerator)
33
+ expect(subject.each.to_a).to eq(left - right)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+ require 'wordlist/operators/union'
3
+
4
+ describe Wordlist::Operators::Union do
5
+ let(:left) { %w[foo bar] }
6
+ let(:right) { %w[bar baz] }
7
+
8
+ subject { described_class.new(left,right) }
9
+
10
+ describe "#each" do
11
+ context "when given a block" do
12
+ it "must yield the words from both wordlists, without duplicates" do
13
+ expect { |b|
14
+ subject.each(&b)
15
+ }.to yield_successive_args(*(left | right))
16
+ end
17
+
18
+ context "when the wordlists do not have any common words" do
19
+ let(:left) { %w[foo bar] }
20
+ let(:right) { %w[baz qux] }
21
+
22
+ it "must yield words from both wordlists" do
23
+ expect { |b|
24
+ subject.each(&b)
25
+ }.to yield_successive_args(*(left + right))
26
+ end
27
+ end
28
+ end
29
+
30
+ context "when not given a block" do
31
+ it "must return an Enumerator for the #each" do
32
+ expect(subject.each).to be_kind_of(Enumerator)
33
+ expect(subject.each.to_a).to eq(left | right)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+ require 'wordlist/operators/unique'
3
+
4
+ describe Wordlist::Operators::Unique do
5
+ let(:wordlist) { %w[foo bar bar baz foo qux] }
6
+
7
+ subject { described_class.new(wordlist) }
8
+
9
+ describe "#each" do
10
+ context "when given a block" do
11
+ it "must yield the unique words from the wordlist" do
12
+ expect { |b|
13
+ subject.each(&b)
14
+ }.to yield_successive_args(*wordlist.uniq)
15
+ end
16
+ end
17
+
18
+ context "when not given a block" do
19
+ it "must return an Enumerator for the #each" do
20
+ expect(subject.each).to be_kind_of(Enumerator)
21
+ expect(subject.each.to_a).to eq(wordlist.uniq)
22
+ end
23
+ end
24
+ end
25
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,6 @@
1
- gem 'rspec', '~> 2.4'
2
1
  require 'rspec'
2
+ require 'simplecov'
3
+ SimpleCov.start
3
4
 
4
5
  require 'wordlist/version'
5
6
  include Wordlist
@@ -1,34 +1,124 @@
1
+ require 'spec_helper'
1
2
  require 'wordlist/unique_filter'
2
3
 
3
- require 'spec_helper'
4
+ describe Wordlist::UniqueFilter do
5
+ describe "#initialize" do
6
+ it "must initialize #hashes to an empty Set" do
7
+ expect(subject.hashes).to be_kind_of(Set)
8
+ expect(subject.hashes).to be_empty
9
+ end
10
+ end
11
+
12
+ describe "#add" do
13
+ let(:word) { "foo" }
14
+
15
+ before { subject.add(word) }
4
16
 
5
- describe UniqueFilter do
6
- before(:each) do
7
- @filter = UniqueFilter.new
17
+ it "must add the String's hash to #hashes" do
18
+ expect(subject.hashes.include?(word.hash)).to be(true)
19
+ end
20
+
21
+ context "when the same word is added twice" do
22
+ before do
23
+ subject.add(word)
24
+ subject.add(word)
25
+ end
26
+
27
+ it "must add the String's hash to #hashes only once" do
28
+ expect(subject.hashes).to eq(Set[word.hash])
29
+ end
30
+ end
8
31
  end
9
32
 
10
- it "should have seen words" do
11
- @filter.saw!('cat')
33
+ describe "#include?" do
34
+ let(:word) { "foo" }
35
+
36
+ before { subject.add(word) }
37
+
38
+ context "when the unique filter contains the String's hash" do
39
+ it "must return true" do
40
+ expect(subject.include?(word)).to be(true)
41
+ end
42
+ end
12
43
 
13
- @filter.seen?('cat').should == true
14
- @filter.seen?('dog').should == false
44
+ context "when the unqiue filter does not contain the String's hash" do
45
+ it "must return false" do
46
+ expect(subject.include?("XXX")).to be(false)
47
+ end
48
+ end
15
49
  end
16
50
 
17
- it "should only see a unique word once" do
18
- @filter.saw!('cat').should == true
19
- @filter.saw!('cat').should == false
51
+ describe "#add?" do
52
+ let(:word) { "foo" }
53
+
54
+ before { subject.add(word) }
55
+
56
+ context "when the unique filter contains the String's hash" do
57
+ it "must return nil" do
58
+ expect(subject.add?(word)).to be(false)
59
+ end
60
+ end
61
+
62
+ context "when the unqiue filter does not contain the String's hash" do
63
+ let(:new_word) { "bar" }
64
+
65
+ it "must return nil" do
66
+ expect(subject.add?(new_word)).to be(true)
67
+ end
68
+ end
20
69
  end
21
70
 
22
- it "should pass only unique words through the filter" do
23
- input = ['dog', 'cat', 'dog']
24
- output = []
71
+ describe "#empty?" do
72
+ context "when no words have been added to the unique filter" do
73
+ it "must return true" do
74
+ expect(subject.empty?).to be(true)
75
+ end
76
+ end
25
77
 
26
- input.each do |word|
27
- @filter.pass(word) do |result|
28
- output << result
78
+ context "when words have been added to the unique filter" do
79
+ let(:word) { 'foo' }
80
+
81
+ before { subject.add(word) }
82
+
83
+ it "must return false" do
84
+ expect(subject.empty?).to be(false)
29
85
  end
30
86
  end
87
+ end
88
+
89
+ describe "#clear" do
90
+ let(:word1) { 'foo' }
91
+ let(:word2) { 'bar' }
92
+
93
+ before do
94
+ subject.add(word1)
95
+ subject.clear
96
+ subject.add(word2)
97
+ end
98
+
99
+ it "must clear the unique filter of any words" do
100
+ expect(subject.include?(word1)).to be(false)
101
+ expect(subject.include?(word2)).to be(true)
102
+ end
103
+ end
104
+
105
+ describe "#size" do
106
+ it "must return 0 by default" do
107
+ expect(subject.size).to eq(0)
108
+ end
31
109
 
32
- output.should == ['dog', 'cat']
110
+ context "when the unique filter has been populated" do
111
+ let(:words) { %w[foo bar baz] }
112
+
113
+ before do
114
+ words.each do |word|
115
+ subject.add(word)
116
+ end
117
+ end
118
+
119
+ it "must return the number of unique words added to the filter" do
120
+ expect(subject.size).to eq(words.length)
121
+ end
122
+ end
33
123
  end
34
124
  end
@@ -1,9 +1,61 @@
1
- require 'wordlist/version'
2
-
3
1
  require 'spec_helper'
2
+ require 'wordlist'
4
3
 
5
4
  describe Wordlist do
6
5
  it "should have a VERSION constant" do
7
- Wordlist.const_defined?('VERSION').should == true
6
+ expect(Wordlist.const_defined?('VERSION')).to be(true)
7
+ end
8
+
9
+ describe ".[]" do
10
+ let(:words) { %w[foo bar baz] }
11
+
12
+ subject { described_class[*words] }
13
+
14
+ it "must return a Wordlist::Words object with the given words" do
15
+ expect(subject).to be_kind_of(Wordlist::Words)
16
+ expect(subject.words).to eq(words)
17
+ end
18
+ end
19
+
20
+ let(:fixtures_dir) { ::File.join(__dir__,'fixtures') }
21
+
22
+ describe ".open" do
23
+ let(:path) { ::File.join(fixtures_dir,'wordlist.txt') }
24
+
25
+ subject { described_class.open(path) }
26
+
27
+ it "must return a Wordlist::File object using the given path" do
28
+ expect(subject).to be_kind_of(Wordlist::File)
29
+ expect(subject.path).to eq(path)
30
+ end
31
+
32
+ context "when given a block" do
33
+ it "must yield the Wordlist::File object" do
34
+ expect { |b|
35
+ described_class.open(path,&b)
36
+ }.to yield_with_args(Wordlist::File)
37
+ end
38
+ end
39
+ end
40
+
41
+ describe ".build" do
42
+ let(:path) { ::File.join(fixtures_dir,'new_wordlist.txt') }
43
+
44
+ subject { described_class.build(path) }
45
+
46
+ it "must return a Wordlist::Builder object using the given path" do
47
+ expect(subject).to be_kind_of(Wordlist::Builder)
48
+ expect(subject.path).to eq(path)
49
+ end
50
+
51
+ context "when given a block" do
52
+ it "must yield the Wordlist::Builder object" do
53
+ expect { |b|
54
+ described_class.build(path,&b)
55
+ }.to yield_with_args(Wordlist::Builder)
56
+ end
57
+ end
58
+
59
+ after { ::FileUtils.rm_f(path) }
8
60
  end
9
61
  end
@@ -0,0 +1,41 @@
1
+ require 'spec_helper'
2
+ require 'wordlist/words'
3
+
4
+ describe Wordlist::Words do
5
+ let(:words) { %w[foo bar baz] }
6
+
7
+ subject { described_class.new(words) }
8
+
9
+ describe "#initialize" do
10
+ it "must set #words" do
11
+ expect(subject.words).to eq(words)
12
+ end
13
+ end
14
+
15
+ describe ".[]" do
16
+ subject { described_class[*words] }
17
+
18
+ it "must return a new #{described_class}" do
19
+ expect(subject).to be_kind_of(described_class)
20
+ end
21
+
22
+ it "must initialize the wordlist with the given words" do
23
+ expect(subject.words).to eq(words)
24
+ end
25
+ end
26
+
27
+ describe "#each" do
28
+ context "when a block is given" do
29
+ it "must yield each word" do
30
+ expect { |b| subject.each(&b) }.to yield_successive_args(*words)
31
+ end
32
+ end
33
+
34
+ context "when no block is given" do
35
+ it "must return an Enumerator for the words" do
36
+ expect(subject.each).to be_kind_of(Enumerator)
37
+ expect(subject.each.to_a).to eq(words)
38
+ end
39
+ end
40
+ end
41
+ end