wordlist 0.1.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (152) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby.yml +28 -0
  3. data/.gitignore +6 -3
  4. data/ChangeLog.md +55 -1
  5. data/Gemfile +15 -0
  6. data/LICENSE.txt +1 -3
  7. data/README.md +301 -60
  8. data/Rakefile +7 -32
  9. data/benchmarks.rb +115 -0
  10. data/bin/wordlist +4 -7
  11. data/data/stop_words/ar.txt +104 -0
  12. data/data/stop_words/bg.txt +259 -0
  13. data/data/stop_words/bn.txt +363 -0
  14. data/data/stop_words/ca.txt +126 -0
  15. data/data/stop_words/cs.txt +138 -0
  16. data/data/stop_words/da.txt +101 -0
  17. data/data/stop_words/de.txt +129 -0
  18. data/data/stop_words/el.txt +79 -0
  19. data/data/stop_words/en.txt +175 -0
  20. data/data/stop_words/es.txt +178 -0
  21. data/data/stop_words/eu.txt +98 -0
  22. data/data/stop_words/fa.txt +332 -0
  23. data/data/stop_words/fi.txt +747 -0
  24. data/data/stop_words/fr.txt +116 -0
  25. data/data/stop_words/ga.txt +109 -0
  26. data/data/stop_words/gl.txt +160 -0
  27. data/data/stop_words/he.txt +499 -0
  28. data/data/stop_words/hi.txt +97 -0
  29. data/data/stop_words/hr.txt +179 -0
  30. data/data/stop_words/hu.txt +35 -0
  31. data/data/stop_words/hy.txt +45 -0
  32. data/data/stop_words/id.txt +357 -0
  33. data/data/stop_words/it.txt +134 -0
  34. data/data/stop_words/ja.txt +44 -0
  35. data/data/stop_words/ko.txt +677 -0
  36. data/data/stop_words/ku.txt +63 -0
  37. data/data/stop_words/lt.txt +507 -0
  38. data/data/stop_words/lv.txt +163 -0
  39. data/data/stop_words/mr.txt +99 -0
  40. data/data/stop_words/nl.txt +48 -0
  41. data/data/stop_words/no.txt +172 -0
  42. data/data/stop_words/pl.txt +138 -0
  43. data/data/stop_words/pt.txt +147 -0
  44. data/data/stop_words/ro.txt +281 -0
  45. data/data/stop_words/ru.txt +421 -0
  46. data/data/stop_words/sk.txt +173 -0
  47. data/data/stop_words/sv.txt +386 -0
  48. data/data/stop_words/th.txt +115 -0
  49. data/data/stop_words/tr.txt +114 -0
  50. data/data/stop_words/uk.txt +28 -0
  51. data/data/stop_words/ur.txt +513 -0
  52. data/data/stop_words/zh.txt +125 -0
  53. data/gemspec.yml +13 -12
  54. data/lib/wordlist/abstract_wordlist.rb +25 -0
  55. data/lib/wordlist/builder.rb +172 -138
  56. data/lib/wordlist/cli.rb +459 -0
  57. data/lib/wordlist/compression/reader.rb +72 -0
  58. data/lib/wordlist/compression/writer.rb +80 -0
  59. data/lib/wordlist/exceptions.rb +31 -0
  60. data/lib/wordlist/file.rb +177 -0
  61. data/lib/wordlist/format.rb +39 -0
  62. data/lib/wordlist/lexer/lang.rb +34 -0
  63. data/lib/wordlist/lexer/stop_words.rb +69 -0
  64. data/lib/wordlist/lexer.rb +221 -0
  65. data/lib/wordlist/list_methods.rb +462 -0
  66. data/lib/wordlist/modifiers/capitalize.rb +46 -0
  67. data/lib/wordlist/modifiers/downcase.rb +46 -0
  68. data/lib/wordlist/modifiers/gsub.rb +52 -0
  69. data/lib/wordlist/modifiers/modifier.rb +44 -0
  70. data/lib/wordlist/modifiers/mutate.rb +134 -0
  71. data/lib/wordlist/modifiers/mutate_case.rb +26 -0
  72. data/lib/wordlist/modifiers/sub.rb +98 -0
  73. data/lib/wordlist/modifiers/tr.rb +72 -0
  74. data/lib/wordlist/modifiers/upcase.rb +46 -0
  75. data/lib/wordlist/modifiers.rb +9 -0
  76. data/lib/wordlist/operators/binary_operator.rb +39 -0
  77. data/lib/wordlist/operators/concat.rb +48 -0
  78. data/lib/wordlist/operators/intersect.rb +56 -0
  79. data/lib/wordlist/operators/operator.rb +29 -0
  80. data/lib/wordlist/operators/power.rb +73 -0
  81. data/lib/wordlist/operators/product.rb +51 -0
  82. data/lib/wordlist/operators/subtract.rb +55 -0
  83. data/lib/wordlist/operators/unary_operator.rb +30 -0
  84. data/lib/wordlist/operators/union.rb +62 -0
  85. data/lib/wordlist/operators/unique.rb +53 -0
  86. data/lib/wordlist/operators.rb +8 -0
  87. data/lib/wordlist/unique_filter.rb +41 -61
  88. data/lib/wordlist/version.rb +4 -2
  89. data/lib/wordlist/words.rb +72 -0
  90. data/lib/wordlist.rb +104 -2
  91. data/spec/abstract_list_spec.rb +18 -0
  92. data/spec/builder_spec.rb +220 -76
  93. data/spec/cli_spec.rb +802 -0
  94. data/spec/compression/reader_spec.rb +137 -0
  95. data/spec/compression/writer_spec.rb +194 -0
  96. data/spec/file_spec.rb +269 -0
  97. data/spec/fixtures/wordlist.txt +15 -0
  98. data/spec/fixtures/wordlist.txt.bz2 +0 -0
  99. data/spec/fixtures/wordlist.txt.gz +0 -0
  100. data/spec/fixtures/wordlist.txt.xz +0 -0
  101. data/spec/fixtures/wordlist_with_ambiguous_format +3 -0
  102. data/spec/fixtures/wordlist_with_comments.txt +19 -0
  103. data/spec/fixtures/wordlist_with_empty_lines.txt +19 -0
  104. data/spec/format_spec.rb +50 -0
  105. data/spec/helpers/text.rb +3 -3
  106. data/spec/helpers/wordlist.rb +2 -2
  107. data/spec/lexer/lang_spec.rb +70 -0
  108. data/spec/lexer/stop_words_spec.rb +77 -0
  109. data/spec/lexer_spec.rb +718 -0
  110. data/spec/list_methods_spec.rb +181 -0
  111. data/spec/modifiers/capitalize_spec.rb +27 -0
  112. data/spec/modifiers/downcase_spec.rb +27 -0
  113. data/spec/modifiers/gsub_spec.rb +59 -0
  114. data/spec/modifiers/modifier_spec.rb +20 -0
  115. data/spec/modifiers/mutate_case_spec.rb +46 -0
  116. data/spec/modifiers/mutate_spec.rb +39 -0
  117. data/spec/modifiers/sub_spec.rb +98 -0
  118. data/spec/modifiers/tr_spec.rb +46 -0
  119. data/spec/modifiers/upcase_spec.rb +27 -0
  120. data/spec/operators/binary_operator_spec.rb +19 -0
  121. data/spec/operators/concat_spec.rb +26 -0
  122. data/spec/operators/intersect_spec.rb +37 -0
  123. data/spec/operators/operator_spec.rb +16 -0
  124. data/spec/operators/power_spec.rb +57 -0
  125. data/spec/operators/product_spec.rb +39 -0
  126. data/spec/operators/subtract_spec.rb +37 -0
  127. data/spec/operators/unary_operator_spec.rb +14 -0
  128. data/spec/operators/union_spec.rb +37 -0
  129. data/spec/operators/unique_spec.rb +25 -0
  130. data/spec/spec_helper.rb +2 -1
  131. data/spec/unique_filter_spec.rb +108 -18
  132. data/spec/wordlist_spec.rb +55 -3
  133. data/spec/words_spec.rb +41 -0
  134. data/wordlist.gemspec +1 -0
  135. metadata +164 -126
  136. data/lib/wordlist/builders/website.rb +0 -216
  137. data/lib/wordlist/builders.rb +0 -1
  138. data/lib/wordlist/flat_file.rb +0 -47
  139. data/lib/wordlist/list.rb +0 -162
  140. data/lib/wordlist/mutator.rb +0 -113
  141. data/lib/wordlist/parsers.rb +0 -74
  142. data/lib/wordlist/runners/list.rb +0 -116
  143. data/lib/wordlist/runners/runner.rb +0 -67
  144. data/lib/wordlist/runners.rb +0 -2
  145. data/scripts/benchmark +0 -59
  146. data/scripts/text/comedy_of_errors.txt +0 -4011
  147. data/spec/classes/parser_class.rb +0 -7
  148. data/spec/classes/test_list.rb +0 -9
  149. data/spec/flat_file_spec.rb +0 -25
  150. data/spec/list_spec.rb +0 -58
  151. data/spec/mutator_spec.rb +0 -43
  152. data/spec/parsers_spec.rb +0 -118
@@ -0,0 +1,459 @@
1
+ # frozen_string_literal: true
2
+ require 'wordlist/file'
3
+ require 'wordlist/builder'
4
+ require 'wordlist/version'
5
+
6
+ require 'optparse'
7
+
8
+ module Wordlist
9
+ #
10
+ # Represents the `wordlist` command's logic.
11
+ #
12
+ # @api private
13
+ #
14
+ # @since 1.0.0
15
+ #
16
+ class CLI
17
+
18
+ # The program name.
19
+ PROGRAM_NAME = "wordlist"
20
+
21
+ # The URL to report bugs to.
22
+ BUG_REPORT_URL = "https://github.com/postmodern/wordlist.rb/issues/new"
23
+
24
+ # Mapping of `--format` option values and `format:` Symbols.
25
+ FORMATS = {
26
+ 'txt' => :txt,
27
+ 'gzip' => :gzip,
28
+ 'bzip2'=> :bzip2,
29
+ 'xz' => :xz
30
+ }
31
+
32
+ # The command's option parser.
33
+ #
34
+ # @return [OptionParser]
35
+ attr_reader :option_parser
36
+
37
+ # Command mode (building or reading).
38
+ #
39
+ # @return [:build, :read]
40
+ attr_reader :mode
41
+
42
+ # The explicit wordlist format to use.
43
+ #
44
+ # @return [:txt, :gzip, :bzip2, :xz, nil]
45
+ attr_reader :format
46
+
47
+ # The path to the output wordlist file.
48
+ #
49
+ # @return [String, nil]
50
+ attr_reader :output
51
+
52
+ # The command to run with each word from the wordlist.
53
+ #
54
+ # @return [String, nil]
55
+ attr_reader :command
56
+
57
+ # Wordlist operators to apply.
58
+ #
59
+ # @return [Array<(Symbol, ...)>]
60
+ attr_reader :operators
61
+
62
+ # Wordlist modifiers to apply.
63
+ #
64
+ # @return [Array<(Symbol, ...)>]
65
+ attr_reader :modifiers
66
+
67
+ # Additional options for {Builder#initialize}.
68
+ #
69
+ # @return [Hash{Symbol => Object}]
70
+ attr_reader :builder_options
71
+
72
+ #
73
+ # Initializes the command.
74
+ #
75
+ # @param [:read, :build] mode
76
+ #
77
+ # @param [:txt, :gzip, :bzip2, :xz, nil] format
78
+ #
79
+ # @param [String, nil] command
80
+ #
81
+ def initialize(mode: :read, format: nil, command: nil)
82
+ @option_parser = option_parser
83
+
84
+ @mode = mode
85
+ @format = format
86
+ @command = command
87
+ @output = nil
88
+
89
+ @operators = []
90
+ @modifiers = []
91
+
92
+ @builder_options = {}
93
+ end
94
+
95
+ #
96
+ # Adds an operator to be applied to the wordlist(s) later.
97
+ #
98
+ # @param [Symbol] name
99
+ # The operator method name.
100
+ #
101
+ # @param [Array<Object>] args
102
+ # Additional arguments for the operator.
103
+ #
104
+ def add_operator(name,*args)
105
+ @operators << [name, args]
106
+ end
107
+
108
+ #
109
+ # Adds a modifier to be applied to the wordlist(s) later.
110
+ #
111
+ # @param [Symbol] name
112
+ # The modifier method name.
113
+ #
114
+ # @param [Array<Object>] args
115
+ # Additional arguments for the modifier.
116
+ #
117
+ def add_modifier(name,*args)
118
+ @modifiers << [name, args]
119
+ end
120
+
121
+ #
122
+ # Opens a wordlist file.
123
+ #
124
+ # @param [String] path
125
+ # The path to the wordlist file.
126
+ #
127
+ # @return [Wordlist::File]
128
+ # The opened wordlist.
129
+ #
130
+ def open_wordlist(path)
131
+ if @format
132
+ Wordlist::File.open(path, format: @format)
133
+ else
134
+ Wordlist::File.open(path)
135
+ end
136
+ rescue WordlistNotFound, UnknownFormat => error
137
+ print_error(error.message)
138
+ exit -1
139
+ end
140
+
141
+ #
142
+ # Initializes and runs the command.
143
+ #
144
+ # @param [Array<String>] argv
145
+ # Command-line arguments.
146
+ #
147
+ # @return [Integer]
148
+ # The exit status of the command.
149
+ #
150
+ def self.run(argv=ARGV)
151
+ new().run(argv)
152
+ rescue Interrupt
153
+ # https://tldp.org/LDP/abs/html/exitcodes.html
154
+ return 130
155
+ rescue Errno::EPIPE
156
+ # STDOUT pipe broken
157
+ return 0
158
+ end
159
+
160
+ #
161
+ # Runs the command.
162
+ #
163
+ # @param [Array<String>] argv
164
+ # Command-line arguments.
165
+ #
166
+ # @return [Integer]
167
+ # The return status code.
168
+ #
169
+ def run(argv=ARGV)
170
+ argv = begin
171
+ @option_parser.parse(argv)
172
+ rescue OptionParser::ParseError => error
173
+ print_error(error.message)
174
+ return -1
175
+ end
176
+
177
+ case @mode
178
+ when :build then build_mode(argv)
179
+ else read_mode(argv)
180
+ end
181
+ rescue => error
182
+ print_backtrace(error)
183
+ return -1
184
+ end
185
+
186
+ #
187
+ # Wordlist building mode.
188
+ #
189
+ # @param [Array<String>] argv
190
+ # Additional command-line arguments.
191
+ #
192
+ def build_mode(argv)
193
+ builder = begin
194
+ if @format
195
+ Builder.open(@output, format: @format, **@builder_options)
196
+ else
197
+ Builder.open(@output, **@builder_options)
198
+ end
199
+ rescue UnknownFormat, CommandNotFound => error
200
+ print_error(error.message)
201
+ return -1
202
+ end
203
+
204
+ begin
205
+ if argv.empty?
206
+ $stdin.each_line do |line|
207
+ builder.parse(line)
208
+ end
209
+ else
210
+ argv.each do |file|
211
+ builder.parse_file(file)
212
+ end
213
+ end
214
+ ensure
215
+ builder.close
216
+ end
217
+
218
+ return 0
219
+ end
220
+
221
+ #
222
+ # Wordlist reading mode.
223
+ #
224
+ # @param [Array<String>] argv
225
+ # Additional command-line arguments.
226
+ #
227
+ def read_mode(argv)
228
+ unless argv.length >= 1
229
+ print_error "too few arguments given, requires at least one WORDLIST argument"
230
+ print_error "usage: #{PROGRAM_NAME} [options] WORDLIST ..."
231
+ return -1
232
+ end
233
+
234
+ # open the first wodlist
235
+ wordlist = open_wordlist(argv.first)
236
+
237
+ # append the additional wordlists
238
+ argv[1..].each { |arg| wordlist += (open_wordlist(arg)) }
239
+
240
+ # apply operators first
241
+ @operators.each do |(operator,args)|
242
+ wordlist.send(operator,*args)
243
+ end
244
+
245
+ # then apply modifiers
246
+ @modifiers.each do |(method,args)|
247
+ wordlist = wordlist.send(method,*args)
248
+ end
249
+
250
+ begin
251
+ if @command
252
+ wordlist.each do |word|
253
+ system(@command.gsub('{}',word))
254
+ end
255
+ else
256
+ wordlist.each do |word|
257
+ puts word
258
+ end
259
+ end
260
+ rescue CommandNotFound => error
261
+ print_error(error.message)
262
+ return -1
263
+ end
264
+
265
+ return 0
266
+ end
267
+
268
+ #
269
+ # The option parser.
270
+ #
271
+ # @return [OptionParser]
272
+ #
273
+ def option_parser
274
+ OptionParser.new do |opts|
275
+ opts.banner = "usage: #{PROGRAM_NAME} { [options] WORDLIST ... | --build WORDLIST [FILE ...] }"
276
+
277
+ opts.separator ""
278
+ opts.separator "Wordlist Reading Options:"
279
+
280
+ opts.on('-f','--format {txt|gzip|bz2|xz}', FORMATS, 'Saves the output to FILE') do |format|
281
+ @format = format
282
+ end
283
+
284
+ opts.on('--exec COMMAND','Runs the command with each word from the wordlist.', 'The string "{}" will be replaced with each word.') do |command|
285
+ @command = command
286
+ end
287
+
288
+ opts.separator ""
289
+ opts.separator "Wordlist Operations:"
290
+
291
+ opts.on('-U','--union WORDLIST','Unions the wordlist with the other WORDLIST') do |wordlist|
292
+ add_operator(:|, open_wordlist(wordlist))
293
+ end
294
+
295
+ opts.on('-I','--intersect WORDLIST','Intersects the wordlist with the other WORDLIST') do |wordlist|
296
+ add_operator(:&, open_wordlist(wordlist))
297
+ end
298
+
299
+ opts.on('-S','--subtract WORDLIST','Subtracts the words from the WORDLIST') do |wordlist|
300
+ add_operator(:-, open_wordlist(wordlist))
301
+ end
302
+
303
+ opts.on('-p','--product WORDLIST', 'Combines every word with the other words from WORDLIST') do |wordlist|
304
+ add_operator(:*, open_wordlist(wordlist))
305
+ end
306
+
307
+ opts.on('-P','--power NUM', Integer, 'Combines every word with the other words from WORDLIST') do |power|
308
+ add_operator(:**, power)
309
+ end
310
+
311
+ opts.on('-u','--unique','Filters out duplicate words') do
312
+ add_operator(:uniq)
313
+ end
314
+
315
+ opts.separator ""
316
+ opts.separator "Wordlist Modifiers:"
317
+
318
+ opts.on('-C','--capitalize','Capitalize each word') do
319
+ add_modifier(:capitalize)
320
+ end
321
+
322
+ opts.on('--uppercase', '--upcase','Converts each word to UPPERCASE') do
323
+ add_modifier(:upcase)
324
+ end
325
+
326
+ opts.on('--lowercase', '--downcase','Converts each word to lowercase') do
327
+ add_modifier(:downcase)
328
+ end
329
+
330
+ opts.on('-t','--tr CHARS:REPLACE','Translates the characters of each word') do |string|
331
+ chars, replace = string.split(':',2)
332
+
333
+ add_modifier(:tr, chars, replace)
334
+ end
335
+
336
+ opts.on('-s','--sub PATTERN:SUB','Replaces PATTERN with SUB in each word') do |string|
337
+ pattern, replace = string.split(':',2)
338
+
339
+ add_modifier(:sub, pattern, replace)
340
+ end
341
+
342
+ opts.on('-g','--gsub PATTERN:SUB','Replaces all PATTERNs with SUB in each word') do |string|
343
+ pattern, replace = string.split(':',2)
344
+
345
+ add_modifier(:gsub, pattern, replace)
346
+ end
347
+
348
+ opts.on('-m','--mutate PATTERN:SUB','Performs every possible substitution on each word') do |string|
349
+ pattern, replace = string.split(':',2)
350
+
351
+ add_modifier(:mutate, pattern, replace)
352
+ end
353
+
354
+ opts.on('-M','--mutate-case','Switches the case of each letter in each word') do
355
+ add_modifier(:mutate_case)
356
+ end
357
+
358
+ opts.separator ""
359
+ opts.separator "Wordlist Building Options:"
360
+
361
+ opts.on('-b','--build WORDLIST','Builds a wordlist') do |wordlist|
362
+ @mode = :build
363
+ @output = wordlist
364
+ end
365
+
366
+ opts.on('-a', '--[no-]append', TrueClass, 'Appends to the new wordlist instead of overwriting it') do |bool|
367
+ @builder_options[:append] = bool
368
+ end
369
+
370
+ opts.on('-L','--lang LANG','The language to expect') do |lang|
371
+ @builder_options[:lang] = lang
372
+ end
373
+
374
+ opts.on('--stop-words WORDS...','Ignores the stop words') do |words|
375
+ @builder_options[:stop_words] = words.split
376
+ end
377
+
378
+ opts.on('--ignore-words WORDS...','Ignore the words') do |words|
379
+ @builder_options[:ignore_words] = words.split
380
+ end
381
+
382
+ opts.on('--[no-]digits', TrueClass, 'Allow digits in the middle of words') do |bool|
383
+ @builder_options[:digits] = bool
384
+ end
385
+
386
+ opts.on('--special-chars CHARS','Allows the given special characters inside of words') do |string|
387
+ @builder_options[:special_chars] = string.chars
388
+ end
389
+
390
+ opts.on('--[no-]numbers', TrueClass, 'Parses whole numbers in addition to words') do |bool|
391
+ @builder_options[:numbers] = bool
392
+ end
393
+
394
+ opts.on('--[no-]acronyms', TrueClass, 'Parses acronyms in addition to words') do |bool|
395
+ @builder_options[:acronyms] = bool
396
+ end
397
+
398
+ opts.on('--[no-]normalize-case', TrueClass, 'Converts all words to lowercase') do |bool|
399
+ @builder_options[:normalize_case] = bool
400
+ end
401
+
402
+ opts.on('--[no-]normalize-apostrophes', TrueClass, 'Removes "\'s" from words') do |bool|
403
+ @builder_options[:normalize_apostrophes] = bool
404
+ end
405
+
406
+ opts.on('--[no-]normalize-acronyms', TrueClass, 'Removes the dots from acronyms') do |bool|
407
+ @builder_options[:normalize_acronyms] = bool
408
+ end
409
+
410
+ opts.separator ""
411
+ opts.separator "General Options:"
412
+
413
+ opts.on('-V','--version','Print the version') do
414
+ puts "#{PROGRAM_NAME} #{VERSION}"
415
+ exit
416
+ end
417
+
418
+ opts.on('-h','--help','Print the help output') do
419
+ puts opts
420
+ exit
421
+ end
422
+
423
+ opts.separator ""
424
+ opts.separator "Examples:"
425
+ opts.separator " #{PROGRAM_NAME} rockyou.txt.gz"
426
+ opts.separator " #{PROGRAM_NAME} passwords_short.txt passwords_long.txt"
427
+ opts.separator " #{PROGRAM_NAME} sport_teams.txt -p beers.txt -p digits.txt"
428
+ opts.separator " cat *.txt | #{PROGRAM_NAME} --build custom.txt"
429
+ opts.separator ""
430
+ end
431
+ end
432
+
433
+ #
434
+ # Prints an error message to stderr.
435
+ #
436
+ # @param [String] error
437
+ # The error message.
438
+ #
439
+ def print_error(error)
440
+ $stderr.puts "#{PROGRAM_NAME}: #{error}"
441
+ end
442
+
443
+ #
444
+ # Prints a backtrace to stderr.
445
+ #
446
+ # @param [Exception] exception
447
+ # The exception.
448
+ #
449
+ def print_backtrace(exception)
450
+ $stderr.puts "Oops! Looks like you've found a bug!"
451
+ $stderr.puts "Please report the following text to: #{BUG_REPORT_URL}"
452
+ $stderr.puts
453
+ $stderr.puts "```"
454
+ $stderr.puts "#{exception.full_message}"
455
+ $stderr.puts "```"
456
+ end
457
+
458
+ end
459
+ end
@@ -0,0 +1,72 @@
1
+ require 'wordlist/exceptions'
2
+
3
+ require 'shellwords'
4
+
5
+ module Wordlist
6
+ module Compression
7
+ #
8
+ # Handles reading compressed files.
9
+ #
10
+ # @since 1.0.0
11
+ #
12
+ module Reader
13
+ # Mapping of compression formats to the commands to read them.
14
+ COMMANDS = {
15
+ gzip: 'zcat',
16
+ bzip2: 'bzcat',
17
+ xz: 'xzcat'
18
+ }
19
+
20
+ #
21
+ # Returns the command to read the compressed wordlist.
22
+ #
23
+ # @param [String] path
24
+ # The path to the file.
25
+ #
26
+ # @param [:gzip, :bzip2, :xz] format
27
+ # The compression format of the file.
28
+ #
29
+ # @return [String]
30
+ # The shellescaped command string.
31
+ #
32
+ # @raise [UnknownFormat]
33
+ # The given format was not `:gzip`, `:bzip2`, or `:xz`.
34
+ #
35
+ def self.command(path, format: )
36
+ command = COMMANDS.fetch(format) do
37
+ raise(UnknownFormat,"unsupported format: #{format.inspect}")
38
+ end
39
+
40
+ Shellwords.shelljoin([command, path])
41
+ end
42
+
43
+ #
44
+ # Opens the compressed wordlist for reading.
45
+ #
46
+ # @param [String] path
47
+ # The path to the file.
48
+ #
49
+ # @param [Hash{Symbol => Object}] kwargs
50
+ # Additional keyword arguments for {command}.
51
+ #
52
+ # @return [IO]
53
+ # The uncompressed IO stream.
54
+ #
55
+ # @raise [ArgumentError]
56
+ # The given format was not `:gzip`, `:bzip2`, or `:xz`.
57
+ #
58
+ # @raise [CommandNotFound]
59
+ # The `zcat,` `bzcat`, or `xzcat` command could not be found.
60
+ #
61
+ def self.open(path,**kwargs,&block)
62
+ command = self.command(path,**kwargs)
63
+
64
+ begin
65
+ IO.popen(command,&block)
66
+ rescue Errno::ENOENT
67
+ raise(CommandNotFound,"#{command.inspect} command not found")
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,80 @@
1
+ require 'wordlist/exceptions'
2
+
3
+ require 'shellwords'
4
+
5
+ module Wordlist
6
+ module Compression
7
+ #
8
+ # Handles writing compressed files.
9
+ #
10
+ # @since 1.0.0
11
+ #
12
+ module Writer
13
+ # Mapping of compression formats to the commands to write to them.
14
+ COMMANDS = {
15
+ gzip: 'gzip',
16
+ bzip2: 'bzip2',
17
+ xz: 'xz'
18
+ }
19
+
20
+ #
21
+ # Returns the command to write to the compressed wordlist.
22
+ #
23
+ # @param [String] path
24
+ # The path to the file.
25
+ #
26
+ # @param [:gzip, :bzip2, :xz] format
27
+ # The compression format of the file.
28
+ #
29
+ # @param [Boolean] append
30
+ # Indicates that new words should be appended to the file instead of
31
+ # overwriting the file.
32
+ #
33
+ # @return [String]
34
+ # The shellescaped command string.
35
+ #
36
+ # @raise [UnknownFormat]
37
+ # The given format was not `:gzip`, `:bzip2`, or `:xz`.
38
+ #
39
+ def self.command(path, format: , append: false)
40
+ command = COMMANDS.fetch(format) do
41
+ raise(UnknownFormat,"unsupported format: #{format.inspect}")
42
+ end
43
+
44
+ redirect = if append then '>>'
45
+ else '>'
46
+ end
47
+
48
+ return "#{Shellwords.shellescape(command)} #{redirect} #{Shellwords.shellescape(path)}"
49
+ end
50
+
51
+ #
52
+ # Opens the compressed wordlist for reading.
53
+ #
54
+ # @param [String] path
55
+ # The path to the file.
56
+ #
57
+ # @param [Hash{Symbol => Object}] kwargs
58
+ # Additional keyword arguments for {command}.
59
+ #
60
+ # @return [IO]
61
+ # The uncompressed IO stream.
62
+ #
63
+ # @raise [ArgumentError]
64
+ # The given format was not `:gzip`, `:bzip2`, or `:xz`.
65
+ #
66
+ # @raise [CommandNotFound]
67
+ # The `gzip`, `bzip2,` or `xz` command was not found on the system.
68
+ #
69
+ def self.open(path,**kwargs)
70
+ command = self.command(path,**kwargs)
71
+
72
+ begin
73
+ IO.popen(command,'w')
74
+ rescue Errno::ENOENT
75
+ raise(CommandNotFound,"#{command.inspect} command not found")
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,31 @@
1
+ module Wordlist
2
+ #
3
+ # @since 1.0.0
4
+ #
5
+ class WordlistError < RuntimeError
6
+ end
7
+
8
+ #
9
+ # @since 1.0.0
10
+ #
11
+ class WordlistNotFound < WordlistError
12
+ end
13
+
14
+ #
15
+ # @since 1.0.0
16
+ #
17
+ class UnknownFormat < WordlistError
18
+ end
19
+
20
+ #
21
+ # @since 1.0.0
22
+ #
23
+ class CommandNotFound < WordlistError
24
+ end
25
+
26
+ #
27
+ # @since 1.0.0
28
+ #
29
+ class UnsupportedLanguage < WordlistError
30
+ end
31
+ end