wordlist 0.1.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (152) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby.yml +28 -0
  3. data/.gitignore +6 -3
  4. data/ChangeLog.md +55 -1
  5. data/Gemfile +15 -0
  6. data/LICENSE.txt +1 -3
  7. data/README.md +301 -60
  8. data/Rakefile +7 -32
  9. data/benchmarks.rb +115 -0
  10. data/bin/wordlist +4 -7
  11. data/data/stop_words/ar.txt +104 -0
  12. data/data/stop_words/bg.txt +259 -0
  13. data/data/stop_words/bn.txt +363 -0
  14. data/data/stop_words/ca.txt +126 -0
  15. data/data/stop_words/cs.txt +138 -0
  16. data/data/stop_words/da.txt +101 -0
  17. data/data/stop_words/de.txt +129 -0
  18. data/data/stop_words/el.txt +79 -0
  19. data/data/stop_words/en.txt +175 -0
  20. data/data/stop_words/es.txt +178 -0
  21. data/data/stop_words/eu.txt +98 -0
  22. data/data/stop_words/fa.txt +332 -0
  23. data/data/stop_words/fi.txt +747 -0
  24. data/data/stop_words/fr.txt +116 -0
  25. data/data/stop_words/ga.txt +109 -0
  26. data/data/stop_words/gl.txt +160 -0
  27. data/data/stop_words/he.txt +499 -0
  28. data/data/stop_words/hi.txt +97 -0
  29. data/data/stop_words/hr.txt +179 -0
  30. data/data/stop_words/hu.txt +35 -0
  31. data/data/stop_words/hy.txt +45 -0
  32. data/data/stop_words/id.txt +357 -0
  33. data/data/stop_words/it.txt +134 -0
  34. data/data/stop_words/ja.txt +44 -0
  35. data/data/stop_words/ko.txt +677 -0
  36. data/data/stop_words/ku.txt +63 -0
  37. data/data/stop_words/lt.txt +507 -0
  38. data/data/stop_words/lv.txt +163 -0
  39. data/data/stop_words/mr.txt +99 -0
  40. data/data/stop_words/nl.txt +48 -0
  41. data/data/stop_words/no.txt +172 -0
  42. data/data/stop_words/pl.txt +138 -0
  43. data/data/stop_words/pt.txt +147 -0
  44. data/data/stop_words/ro.txt +281 -0
  45. data/data/stop_words/ru.txt +421 -0
  46. data/data/stop_words/sk.txt +173 -0
  47. data/data/stop_words/sv.txt +386 -0
  48. data/data/stop_words/th.txt +115 -0
  49. data/data/stop_words/tr.txt +114 -0
  50. data/data/stop_words/uk.txt +28 -0
  51. data/data/stop_words/ur.txt +513 -0
  52. data/data/stop_words/zh.txt +125 -0
  53. data/gemspec.yml +13 -12
  54. data/lib/wordlist/abstract_wordlist.rb +25 -0
  55. data/lib/wordlist/builder.rb +172 -138
  56. data/lib/wordlist/cli.rb +459 -0
  57. data/lib/wordlist/compression/reader.rb +72 -0
  58. data/lib/wordlist/compression/writer.rb +80 -0
  59. data/lib/wordlist/exceptions.rb +31 -0
  60. data/lib/wordlist/file.rb +177 -0
  61. data/lib/wordlist/format.rb +39 -0
  62. data/lib/wordlist/lexer/lang.rb +34 -0
  63. data/lib/wordlist/lexer/stop_words.rb +69 -0
  64. data/lib/wordlist/lexer.rb +221 -0
  65. data/lib/wordlist/list_methods.rb +462 -0
  66. data/lib/wordlist/modifiers/capitalize.rb +46 -0
  67. data/lib/wordlist/modifiers/downcase.rb +46 -0
  68. data/lib/wordlist/modifiers/gsub.rb +52 -0
  69. data/lib/wordlist/modifiers/modifier.rb +44 -0
  70. data/lib/wordlist/modifiers/mutate.rb +134 -0
  71. data/lib/wordlist/modifiers/mutate_case.rb +26 -0
  72. data/lib/wordlist/modifiers/sub.rb +98 -0
  73. data/lib/wordlist/modifiers/tr.rb +72 -0
  74. data/lib/wordlist/modifiers/upcase.rb +46 -0
  75. data/lib/wordlist/modifiers.rb +9 -0
  76. data/lib/wordlist/operators/binary_operator.rb +39 -0
  77. data/lib/wordlist/operators/concat.rb +48 -0
  78. data/lib/wordlist/operators/intersect.rb +56 -0
  79. data/lib/wordlist/operators/operator.rb +29 -0
  80. data/lib/wordlist/operators/power.rb +73 -0
  81. data/lib/wordlist/operators/product.rb +51 -0
  82. data/lib/wordlist/operators/subtract.rb +55 -0
  83. data/lib/wordlist/operators/unary_operator.rb +30 -0
  84. data/lib/wordlist/operators/union.rb +62 -0
  85. data/lib/wordlist/operators/unique.rb +53 -0
  86. data/lib/wordlist/operators.rb +8 -0
  87. data/lib/wordlist/unique_filter.rb +41 -61
  88. data/lib/wordlist/version.rb +4 -2
  89. data/lib/wordlist/words.rb +72 -0
  90. data/lib/wordlist.rb +104 -2
  91. data/spec/abstract_list_spec.rb +18 -0
  92. data/spec/builder_spec.rb +220 -76
  93. data/spec/cli_spec.rb +802 -0
  94. data/spec/compression/reader_spec.rb +137 -0
  95. data/spec/compression/writer_spec.rb +194 -0
  96. data/spec/file_spec.rb +269 -0
  97. data/spec/fixtures/wordlist.txt +15 -0
  98. data/spec/fixtures/wordlist.txt.bz2 +0 -0
  99. data/spec/fixtures/wordlist.txt.gz +0 -0
  100. data/spec/fixtures/wordlist.txt.xz +0 -0
  101. data/spec/fixtures/wordlist_with_ambiguous_format +3 -0
  102. data/spec/fixtures/wordlist_with_comments.txt +19 -0
  103. data/spec/fixtures/wordlist_with_empty_lines.txt +19 -0
  104. data/spec/format_spec.rb +50 -0
  105. data/spec/helpers/text.rb +3 -3
  106. data/spec/helpers/wordlist.rb +2 -2
  107. data/spec/lexer/lang_spec.rb +70 -0
  108. data/spec/lexer/stop_words_spec.rb +77 -0
  109. data/spec/lexer_spec.rb +718 -0
  110. data/spec/list_methods_spec.rb +181 -0
  111. data/spec/modifiers/capitalize_spec.rb +27 -0
  112. data/spec/modifiers/downcase_spec.rb +27 -0
  113. data/spec/modifiers/gsub_spec.rb +59 -0
  114. data/spec/modifiers/modifier_spec.rb +20 -0
  115. data/spec/modifiers/mutate_case_spec.rb +46 -0
  116. data/spec/modifiers/mutate_spec.rb +39 -0
  117. data/spec/modifiers/sub_spec.rb +98 -0
  118. data/spec/modifiers/tr_spec.rb +46 -0
  119. data/spec/modifiers/upcase_spec.rb +27 -0
  120. data/spec/operators/binary_operator_spec.rb +19 -0
  121. data/spec/operators/concat_spec.rb +26 -0
  122. data/spec/operators/intersect_spec.rb +37 -0
  123. data/spec/operators/operator_spec.rb +16 -0
  124. data/spec/operators/power_spec.rb +57 -0
  125. data/spec/operators/product_spec.rb +39 -0
  126. data/spec/operators/subtract_spec.rb +37 -0
  127. data/spec/operators/unary_operator_spec.rb +14 -0
  128. data/spec/operators/union_spec.rb +37 -0
  129. data/spec/operators/unique_spec.rb +25 -0
  130. data/spec/spec_helper.rb +2 -1
  131. data/spec/unique_filter_spec.rb +108 -18
  132. data/spec/wordlist_spec.rb +55 -3
  133. data/spec/words_spec.rb +41 -0
  134. data/wordlist.gemspec +1 -0
  135. metadata +164 -126
  136. data/lib/wordlist/builders/website.rb +0 -216
  137. data/lib/wordlist/builders.rb +0 -1
  138. data/lib/wordlist/flat_file.rb +0 -47
  139. data/lib/wordlist/list.rb +0 -162
  140. data/lib/wordlist/mutator.rb +0 -113
  141. data/lib/wordlist/parsers.rb +0 -74
  142. data/lib/wordlist/runners/list.rb +0 -116
  143. data/lib/wordlist/runners/runner.rb +0 -67
  144. data/lib/wordlist/runners.rb +0 -2
  145. data/scripts/benchmark +0 -59
  146. data/scripts/text/comedy_of_errors.txt +0 -4011
  147. data/spec/classes/parser_class.rb +0 -7
  148. data/spec/classes/test_list.rb +0 -9
  149. data/spec/flat_file_spec.rb +0 -25
  150. data/spec/list_spec.rb +0 -58
  151. data/spec/mutator_spec.rb +0 -43
  152. data/spec/parsers_spec.rb +0 -118
data/spec/builder_spec.rb CHANGED
@@ -1,127 +1,271 @@
1
+ require 'spec_helper'
1
2
  require 'wordlist/builder'
2
3
 
3
- require 'spec_helper'
4
- require 'helpers/text'
5
- require 'helpers/wordlist'
6
- require 'builder_examples'
4
+ require 'fileutils'
5
+
6
+ describe Wordlist::Builder do
7
+ let(:fixtures_dir) { ::File.join(__dir__,'fixtures') }
8
+ let(:path) { ::File.join(fixtures_dir,'new_wordlist.txt') }
9
+
10
+ subject { described_class.new(path) }
11
+
12
+ describe "#initialize" do
13
+ it "must initialize the #path" do
14
+ expect(subject.path).to eq(path)
15
+ end
7
16
 
8
- describe Builder do
9
- include Helpers
17
+ context "when the path ends in '.txt'" do
18
+ let(:path) { ::File.join(fixtures_dir,'new_wordlist.txt') }
10
19
 
11
- describe "new wordlist" do
12
- before(:all) do
13
- @expected = ['dog', 'cat', 'catx', 'dat']
20
+ it "must default #format to :txt" do
21
+ expect(subject.format).to eq(:txt)
22
+ end
14
23
  end
15
24
 
16
- before(:each) do
17
- @path = wordlist_tempfile
25
+ context "when the path ends in '.gz'" do
26
+ let(:path) { ::File.join(fixtures_dir,'new_wordlist.gz') }
27
+
28
+ it "must default #format to :gzip" do
29
+ expect(subject.format).to eq(:gzip)
30
+ end
18
31
  end
19
32
 
20
- it_should_behave_like "a wordlist Builder"
21
- end
33
+ context "when the path ends in '.bz2'" do
34
+ let(:path) { ::File.join(fixtures_dir,'new_wordlist.bz2') }
22
35
 
23
- describe "existing wordlist" do
24
- before(:all) do
25
- @path = '/tmp/bla'
26
- @expected = ['dog', 'cat', 'log', 'catx', 'dat']
36
+ it "must default #format to :bzip2" do
37
+ expect(subject.format).to eq(:bzip2)
38
+ end
39
+ end
40
+
41
+ context "when the path ends in '.xz'" do
42
+ let(:path) { ::File.join(fixtures_dir,'new_wordlist.xz') }
43
+
44
+ it "must default #format to :xz" do
45
+ expect(subject.format).to eq(:xz)
46
+ end
27
47
  end
28
48
 
29
- before(:each) do
30
- @path = wordlist_tempfile(Helpers::PREVIOUS_WORDLIST)
49
+ context "when format: :txt is given" do
50
+ subject { described_class.new(path, format: :txt) }
51
+
52
+ it "must set #format to :txt" do
53
+ expect(subject.format).to eq(:txt)
54
+ end
31
55
  end
32
56
 
33
- it_should_behave_like "a wordlist Builder"
57
+ context "when format: :gzip is given" do
58
+ subject { described_class.new(path, format: :gzip) }
59
+
60
+ it "must set #format to :gzip" do
61
+ expect(subject.format).to eq(:gzip)
62
+ end
63
+ end
64
+
65
+ context "when format: :bzip2 is given" do
66
+ subject { described_class.new(path, format: :bzip2) }
67
+
68
+ it "must set #format to :bzip2" do
69
+ expect(subject.format).to eq(:bzip2)
70
+ end
71
+ end
72
+
73
+ context "when format: :xz is given" do
74
+ subject { described_class.new(path, format: :xz) }
75
+
76
+ it "must set #format to :xz" do
77
+ expect(subject.format).to eq(:xz)
78
+ end
79
+ end
80
+
81
+ it "must default #append? to false" do
82
+ expect(subject.append?).to be(false)
83
+ end
84
+
85
+ it "#unique_filter must be empty" do
86
+ expect(subject.unique_filter).to be_empty
87
+ end
88
+
89
+ it "must open the wordlist file" do
90
+ expect(subject).to_not be_closed
91
+ end
92
+
93
+ context "when given append: true" do
94
+ context "and the wordlist file already exists" do
95
+ let(:path) { ::File.join(fixtures_dir,'pre_existing_wordlist.txt') }
96
+ let(:pre_existing_words) { %w[foo bar] }
97
+
98
+ subject { described_class.new(path, append: true) }
99
+
100
+ before do
101
+ ::File.open(path,'w') do |file|
102
+ pre_existing_words.each do |word|
103
+ file.puts word
104
+ end
105
+ end
106
+ end
107
+
108
+ it "must add the pre-existing words to the #unique_filter" do
109
+ expect(pre_existing_words.all? { |word|
110
+ subject.unique_filter.include?(word)
111
+ }).to be(true)
112
+ end
113
+
114
+ after { ::FileUtils.rm_f(path) }
115
+ end
116
+ end
34
117
  end
35
118
 
36
- describe "word queue" do
37
- before(:all) do
38
- @path = wordlist_tempfile
119
+ describe "#lexer" do
120
+ it "must be a Lexer" do
121
+ expect(subject.lexer).to be_kind_of(Wordlist::Lexer)
39
122
  end
123
+ end
40
124
 
41
- before(:each) do
42
- @builder = Builder.new(@path, :max_words => 2)
125
+ describe "#unique_filter" do
126
+ it "must be a UniqueFilter" do
127
+ expect(subject.unique_filter).to be_kind_of(Wordlist::UniqueFilter)
43
128
  end
129
+ end
130
+
131
+ let(:added_words) { ::File.readlines(path).map(&:chomp) }
44
132
 
45
- it "should act like a queue" do
46
- @builder.enqueue('dog')
47
- @builder.enqueue('cat')
133
+ before { ::FileUtils.rm_f(path) }
48
134
 
49
- @builder.word_queue.should == ['dog', 'cat']
135
+ describe "#add" do
136
+ let(:word) { 'foo' }
137
+
138
+ before do
139
+ described_class.open(path) do |builder|
140
+ builder.add(word)
141
+ end
50
142
  end
51
143
 
52
- it "should have a maximum length of the queue" do
53
- @builder.enqueue('dog')
54
- @builder.enqueue('cat')
55
- @builder.enqueue('log')
144
+ it "must add the word to the file" do
145
+ expect(added_words).to eq([word])
146
+ end
147
+
148
+ context "when the same word is added multiple times" do
149
+ before do
150
+ described_class.open(path) do |builder|
151
+ builder.add(word)
152
+ builder.add(word)
153
+ end
154
+ end
56
155
 
57
- @builder.word_queue.should == ['cat', 'log']
156
+ it "must filter out duplicate words" do
157
+ expect(File.readlines(path).map(&:chomp)).to eq([word])
158
+ end
58
159
  end
59
160
  end
60
161
 
61
- describe "word combinations" do
62
- before(:all) do
63
- @path = wordlist_tempfile
162
+ describe "#append" do
163
+ let(:words) { %w[foo bar baz] }
164
+
165
+ before do
166
+ described_class.open(path) do |builder|
167
+ builder.append(words)
168
+ end
169
+ end
170
+
171
+ it "must add the words to the file" do
172
+ expect(added_words).to eq(words)
64
173
  end
65
174
 
66
- it "should yield only one word when max_words is set to 1" do
67
- builder = Builder.new(@path)
68
- builder.enqueue('dog')
175
+ context "when there are duplicate words in the given Array" do
176
+ let(:words) { %w[foo bar bar baz] }
69
177
 
70
- builder.word_combinations do |words|
71
- words.should == 'dog'
178
+ it "must filter out duplicate words" do
179
+ expect(added_words).to eq(words.uniq)
72
180
  end
73
181
  end
182
+ end
74
183
 
75
- it "should include the last seen word in every combination" do
76
- builder = Builder.new(@path, :max_words => 2)
77
- builder.enqueue('dog')
78
- builder.enqueue('cat')
79
- builder.enqueue('dat')
184
+ describe "#parse" do
185
+ let(:words) { %w[foo bar baz] }
186
+ let(:text) { "foo bar, baz." }
80
187
 
81
- builder.word_combinations do |words|
82
- words.split(' ').include?('dat').should == true
188
+ before do
189
+ described_class.open(path) do |builder|
190
+ builder.parse(text)
83
191
  end
84
192
  end
85
193
 
86
- it "should include a minimum number of words" do
87
- builder = Builder.new(@path, :min_words => 2, :max_words => 3)
88
- builder.enqueue('dog')
89
- builder.enqueue('cat')
90
- builder.enqueue('dat')
194
+ it "must parse the text into words and add them to the file" do
195
+ expect(added_words).to eq(words)
196
+ end
197
+
198
+ context "when the text contains duplicate words" do
199
+ let(:text) { "foo bar bar, baz baz." }
91
200
 
92
- builder.word_combinations do |words|
93
- words.split(' ').length.should >= 2
201
+ it "must filter out duplicate words" do
202
+ expect(added_words).to eq(words)
94
203
  end
95
204
  end
205
+ end
206
+
207
+ describe "#parse_file" do
208
+ let(:text_file) { ::File.join(fixtures_dir,'text_file.txt') }
96
209
 
97
- it "should not include more than a maximum number of words" do
98
- builder = Builder.new(@path, :max_words => 2)
99
- builder.enqueue('dog')
100
- builder.enqueue('cat')
101
- builder.enqueue('dat')
210
+ let(:words) { %w[foo bar baz] }
211
+ let(:text) { "foo bar, baz." }
102
212
 
103
- builder.word_combinations do |words|
104
- words.split(' ').length.should_not > 2
213
+ before do
214
+ ::File.write(text_file,text)
215
+
216
+ described_class.open(path) do |builder|
217
+ builder.parse(text)
105
218
  end
106
219
  end
107
220
 
108
- it "should preserve the order words were seen in" do
109
- builder = Builder.new(@path, :max_words => 3)
110
- builder.enqueue('dog')
111
- builder.enqueue('cat')
112
- builder.enqueue('dat')
221
+ it "must parse the text file into words and add them to the file" do
222
+ expect(added_words).to eq(words)
223
+ end
113
224
 
114
- combinations = []
225
+ context "when the text file contains duplicate words" do
226
+ let(:text) { "foo bar bar, baz baz." }
115
227
 
116
- builder.word_combinations do |words|
117
- combinations << words
228
+ it "must filter out duplicate words" do
229
+ expect(added_words).to eq(words)
118
230
  end
231
+ end
232
+
233
+ after { ::FileUtils.rm_f(text_file) }
234
+ end
235
+
236
+ describe "#close" do
237
+ let(:word) { 'foo' }
238
+
239
+ it "must close the wordlist file" do
240
+ expect(::File.file?(path)).to be(false)
241
+
242
+ subject.add(word)
243
+ subject.close
244
+
245
+ expect(::File.file?(path)).to be(true)
246
+ expect(::File.size(path)).to be > 0
247
+ end
119
248
 
120
- combinations.should == [
121
- 'dat',
122
- 'cat dat',
123
- 'dog cat dat'
124
- ]
249
+ it "must clear the unique filter" do
250
+ expect(subject.unique_filter).to be_empty
125
251
  end
126
252
  end
253
+
254
+ describe "#closed?" do
255
+ context "when the builder was been initialized" do
256
+ it "must return false" do
257
+ expect(subject.closed?).to be(false)
258
+ end
259
+ end
260
+
261
+ context "when #close has been called" do
262
+ before { subject.close }
263
+
264
+ it "must return true" do
265
+ expect(subject.closed?).to be(true)
266
+ end
267
+ end
268
+ end
269
+
270
+ after { ::FileUtils.rm_f(path) }
127
271
  end