RubyGems - wordlist - Versions diffs - 0.1.1 → 1.0.0 - Mend

wordlist 0.1.1 → 1.0.0

Files changed (148) hide show

checksums.yaml +7 -0
data/.github/workflows/ruby.yml +27 -0
data/.gitignore +6 -3
data/ChangeLog.md +45 -1
data/Gemfile +13 -0
data/LICENSE.txt +1 -3
data/README.md +266 -61
data/Rakefile +7 -32
data/benchmarks.rb +115 -0
data/bin/wordlist +4 -7
data/data/stop_words/ar.txt +104 -0
data/data/stop_words/bg.txt +259 -0
data/data/stop_words/bn.txt +363 -0
data/data/stop_words/ca.txt +126 -0
data/data/stop_words/cs.txt +138 -0
data/data/stop_words/da.txt +101 -0
data/data/stop_words/de.txt +129 -0
data/data/stop_words/el.txt +79 -0
data/data/stop_words/en.txt +175 -0
data/data/stop_words/es.txt +178 -0
data/data/stop_words/eu.txt +98 -0
data/data/stop_words/fa.txt +332 -0
data/data/stop_words/fi.txt +747 -0
data/data/stop_words/fr.txt +116 -0
data/data/stop_words/ga.txt +109 -0
data/data/stop_words/gl.txt +160 -0
data/data/stop_words/he.txt +499 -0
data/data/stop_words/hi.txt +97 -0
data/data/stop_words/hr.txt +179 -0
data/data/stop_words/hu.txt +35 -0
data/data/stop_words/hy.txt +45 -0
data/data/stop_words/id.txt +357 -0
data/data/stop_words/it.txt +134 -0
data/data/stop_words/ja.txt +44 -0
data/data/stop_words/ko.txt +677 -0
data/data/stop_words/ku.txt +63 -0
data/data/stop_words/lt.txt +507 -0
data/data/stop_words/lv.txt +163 -0
data/data/stop_words/mr.txt +99 -0
data/data/stop_words/nl.txt +48 -0
data/data/stop_words/no.txt +172 -0
data/data/stop_words/pl.txt +138 -0
data/data/stop_words/pt.txt +147 -0
data/data/stop_words/ro.txt +281 -0
data/data/stop_words/ru.txt +421 -0
data/data/stop_words/sk.txt +173 -0
data/data/stop_words/sv.txt +386 -0
data/data/stop_words/th.txt +115 -0
data/data/stop_words/tr.txt +114 -0
data/data/stop_words/uk.txt +28 -0
data/data/stop_words/ur.txt +513 -0
data/data/stop_words/zh.txt +125 -0
data/gemspec.yml +4 -10
data/lib/wordlist/abstract_wordlist.rb +24 -0
data/lib/wordlist/builder.rb +170 -138
data/lib/wordlist/cli.rb +458 -0
data/lib/wordlist/compression/reader.rb +72 -0
data/lib/wordlist/compression/writer.rb +80 -0
data/lib/wordlist/exceptions.rb +31 -0
data/lib/wordlist/file.rb +176 -0
data/lib/wordlist/format.rb +38 -0
data/lib/wordlist/lexer/lang.rb +32 -0
data/lib/wordlist/lexer/stop_words.rb +68 -0
data/lib/wordlist/lexer.rb +218 -0
data/lib/wordlist/list_methods.rb +462 -0
data/lib/wordlist/modifiers/capitalize.rb +45 -0
data/lib/wordlist/modifiers/downcase.rb +45 -0
data/lib/wordlist/modifiers/gsub.rb +51 -0
data/lib/wordlist/modifiers/modifier.rb +44 -0
data/lib/wordlist/modifiers/mutate.rb +133 -0
data/lib/wordlist/modifiers/mutate_case.rb +25 -0
data/lib/wordlist/modifiers/sub.rb +97 -0
data/lib/wordlist/modifiers/tr.rb +71 -0
data/lib/wordlist/modifiers/upcase.rb +45 -0
data/lib/wordlist/modifiers.rb +8 -0
data/lib/wordlist/operators/binary_operator.rb +38 -0
data/lib/wordlist/operators/concat.rb +47 -0
data/lib/wordlist/operators/intersect.rb +55 -0
data/lib/wordlist/operators/operator.rb +29 -0
data/lib/wordlist/operators/power.rb +72 -0
data/lib/wordlist/operators/product.rb +50 -0
data/lib/wordlist/operators/subtract.rb +54 -0
data/lib/wordlist/operators/unary_operator.rb +29 -0
data/lib/wordlist/operators/union.rb +61 -0
data/lib/wordlist/operators/unique.rb +52 -0
data/lib/wordlist/operators.rb +7 -0
data/lib/wordlist/unique_filter.rb +40 -61
data/lib/wordlist/version.rb +1 -1
data/lib/wordlist/words.rb +71 -0
data/lib/wordlist.rb +103 -2
data/spec/abstract_list_spec.rb +18 -0
data/spec/builder_spec.rb +220 -76
data/spec/cli_spec.rb +801 -0
data/spec/compression/reader_spec.rb +137 -0
data/spec/compression/writer_spec.rb +194 -0
data/spec/file_spec.rb +258 -0
data/spec/fixtures/wordlist.txt +15 -0
data/spec/fixtures/wordlist.txt.bz2 +0 -0
data/spec/fixtures/wordlist.txt.gz +0 -0
data/spec/fixtures/wordlist.txt.xz +0 -0
data/spec/fixtures/wordlist_with_ambiguous_format +3 -0
data/spec/fixtures/wordlist_with_comments.txt +19 -0
data/spec/fixtures/wordlist_with_empty_lines.txt +19 -0
data/spec/format_spec.rb +50 -0
data/spec/helpers/text.rb +3 -3
data/spec/helpers/wordlist.rb +2 -2
data/spec/lexer/lang_spec.rb +70 -0
data/spec/lexer/stop_words_spec.rb +77 -0
data/spec/lexer_spec.rb +652 -0
data/spec/list_methods_spec.rb +181 -0
data/spec/modifiers/capitalize_spec.rb +27 -0
data/spec/modifiers/downcase_spec.rb +27 -0
data/spec/modifiers/gsub_spec.rb +59 -0
data/spec/modifiers/modifier_spec.rb +20 -0
data/spec/modifiers/mutate_case_spec.rb +46 -0
data/spec/modifiers/mutate_spec.rb +39 -0
data/spec/modifiers/sub_spec.rb +98 -0
data/spec/modifiers/tr_spec.rb +46 -0
data/spec/modifiers/upcase_spec.rb +27 -0
data/spec/operators/binary_operator_spec.rb +19 -0
data/spec/operators/concat_spec.rb +26 -0
data/spec/operators/intersect_spec.rb +37 -0
data/spec/operators/operator_spec.rb +16 -0
data/spec/operators/power_spec.rb +57 -0
data/spec/operators/product_spec.rb +39 -0
data/spec/operators/subtract_spec.rb +37 -0
data/spec/operators/union_spec.rb +37 -0
data/spec/operators/unique_spec.rb +25 -0
data/spec/spec_helper.rb +2 -1
data/spec/unique_filter_spec.rb +108 -18
data/spec/wordlist_spec.rb +55 -3
data/spec/words_spec.rb +41 -0
metadata +183 -120
data/lib/wordlist/builders/website.rb +0 -216
data/lib/wordlist/builders.rb +0 -1
data/lib/wordlist/flat_file.rb +0 -47
data/lib/wordlist/list.rb +0 -162
data/lib/wordlist/mutator.rb +0 -113
data/lib/wordlist/parsers.rb +0 -74
data/lib/wordlist/runners/list.rb +0 -116
data/lib/wordlist/runners/runner.rb +0 -67
data/lib/wordlist/runners.rb +0 -2
data/scripts/benchmark +0 -59
data/scripts/text/comedy_of_errors.txt +0 -4011
data/spec/flat_file_spec.rb +0 -25
data/spec/list_spec.rb +0 -58
data/spec/mutator_spec.rb +0 -43
data/spec/parsers_spec.rb +0 -118

data/spec/lexer_spec.rb ADDED Viewed

@@ -0,0 +1,652 @@
+require 'spec_helper'
+require 'wordlist/lexer'
+describe Wordlist::Lexer do
+  let(:text) { "foo bar baz qux" }
+  it do
+    expect(described_class).to include(Enumerable)
+  end
+  describe "#initialize" do
+    let(:default_lang) { Wordlist::Lexer::Lang.default }
+    it "must default #lang to Lang.default_lang" do
+      expect(subject.lang).to eq(default_lang)
+    end
+    it "must set #stop_words to the stop words for :en" do
+      expect(subject.stop_words).to eq(Wordlist::Lexer::StopWords[default_lang])
+    end
+    it "must default #ignore_words to []" do
+      expect(subject.ignore_words).to eq([])
+    end
+    it "must default #digits? to true" do
+      expect(subject.digits?).to be(true)
+    end
+    it "must default #special_chars to SPECIAL_CHARS" do
+      expect(subject.special_chars).to eq(described_class::SPECIAL_CHARS)
+    end
+    it "must default #numbers? to false" do
+      expect(subject.numbers?).to be(false)
+    end
+    it "must default #acroynyms? to true" do
+      expect(subject.acronyms?).to be(true)
+    end
+    it "must default #normalize_case? to false" do
+      expect(subject.normalize_case?).to be(false)
+    end
+    it "must default #normalize_apostrophes? to false" do
+      expect(subject.normalize_apostrophes?).to be(false)
+    end
+    it "must default #normalize_acroynyms? to false" do
+      expect(subject.normalize_acronyms?).to be(false)
+    end
+    context "when the lang: keyword is given" do
+      let(:lang) { :es }
+      subject { described_class.new(lang: lang) }
+      it "must set #lang" do
+        expect(subject.lang).to eq(lang)
+      end
+      it "must set #stop_words to the stop words for that language" do
+        expect(subject.stop_words).to eq(Wordlist::Lexer::StopWords[lang])
+      end
+    end
+    context "when ignore_words: keyword argument is given" do
+      let(:ignore_words) { %w[foo bar] }
+      subject { described_class.new(ignore_words: ignore_words) }
+      it "must set #ignore_words" do
+        expect(subject.ignore_words).to eq(ignore_words)
+      end
+      context "and it contains an object other than a String or Regexp" do
+        it do
+          expect {
+            described_class.new(ignore_words: [Object.new])
+          }.to raise_error(ArgumentError,"ignore_words: must contain only Strings or Regexps")
+        end
+      end
+    end
+    context "when digits: false is given" do
+      subject { described_class.new(digits: false) }
+      it "must set #digits? to false" do
+        expect(subject.digits?).to be(false)
+      end
+    end
+    context "when special_chars: keyword is given" do
+      let(:special_chars) { %w[_ -] }
+      subject { described_class.new(special_chars: special_chars) }
+      it "must set #special_chars" do
+        expect(subject.special_chars).to eq(special_chars)
+      end
+    end
+    context "when numbers: true is given" do
+      subject { described_class.new(numbers: true) }
+      it "must set #numbers? to true" do
+        expect(subject.numbers?).to be(true)
+      end
+    end
+    context "when acronyms: true is given" do
+      subject { described_class.new(acronyms: true) }
+      it "must set #acronyms? to true" do
+        expect(subject.acronyms?).to be(true)
+      end
+    end
+    context "when normalize_case: true is given" do
+      subject { described_class.new(normalize_case: true) }
+      it "must set #normalize_case? to true" do
+        expect(subject.normalize_case?).to be(true)
+      end
+    end
+    context "when normalize_apostrophes: true is given" do
+      subject { described_class.new(normalize_apostrophes: true) }
+      it "must set #normalize_apostrophes? to true" do
+        expect(subject.normalize_apostrophes?).to be(true)
+      end
+    end
+    context "when normalize_acronyms: true is given" do
+      subject { described_class.new(normalize_acronyms: true) }
+      it "must set #normalize_acronyms? to true" do
+        expect(subject.normalize_acronyms?).to be(true)
+      end
+    end
+  end
+  describe "#parse" do
+    let(:expected_words) { %w[foo bar baz qux]      }
+    let(:text)           { expected_words.join(' ') }
+    context "when a block is given" do
+      it "must yield each scanned word from the text" do
+        expect { |b|
+          subject.parse(text,&b)
+        }.to yield_successive_args(*expected_words)
+      end
+      context "when the words contain uppercase letters" do
+        let(:expected_words) { %w[foo Bar baZ QUX] }
+        it "must parse words containing uppercase letters" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+        context "but when initialized with normalize_case: true" do
+          let(:expected_words) { %w[foo bar baz qux] }
+          let(:text)           { "foo Bar baZ QUX"   }
+          subject { described_class.new(normalize_case: true) }
+          it "must convert all words to lowercase" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+      end
+      context "and when the text contains single letters" do
+        let(:letters)        { %w[x y z]         }
+        let(:expected_words) { super() + letters }
+        it "must parse single letter words" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+        context "when the text also contains single letter stop words" do
+          let(:letters)        { %w[a b c i j k] }
+          let(:stop_words)     { %w[a i]         }
+          let(:expected_words) { super() - stop_words }
+          let(:text)           { "#{super()} #{stop_words.join(' ')}" }
+          it "must parse single letter words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+      end
+      context "and when the text contains newlines" do
+        let(:text) { expected_words.join("\n") }
+        it "must parse each line" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+      end
+      context "and when the text contains punctuation" do
+        let(:text) { expected_words.join(", ") + '.' }
+        it "must ignore all punctuation" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+        context "and the words start with a '\\'' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo 'bar baz"  }
+          it "must skip the leading '\\' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words contain '\\'' characters" do
+          let(:expected_words) { super() + %w[O'Brian] }
+          it "must parse the words containing a '\\''" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "and when initialized with normalize_apostrophes: true" do
+            let(:text)           { "foo bar's baz" }
+            let(:expected_words) { %w[foo bar baz] }
+            subject { described_class.new(normalize_apostrophes: true) }
+            it "must remove any trailing \"'s\" from the words" do
+              expect { |b|
+                subject.parse(text,&b)
+              }.to yield_successive_args(*expected_words)
+            end
+          end
+        end
+        context "and the words end with a '\\'' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo bar' baz"  }
+          it "must skip the trailing '\\' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words start with a '-' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo -bar baz"  }
+          it "must skip the leading '-' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words contain '-' characters" do
+          let(:expected_words) { %w[foo-bar baz-qux] }
+          it "must parse words containing a '-'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "and when initialized with chars: keyword argument" do
+            context "and it does not include '-'" do
+              let(:text)           { "foo bar-baz qux"   }
+              let(:expected_words) { %w[foo bar baz qux] }
+              subject { described_class.new(special_chars: ['_']) }
+              it "must split hyphenated words into multiple words" do
+                expect { |b|
+                  subject.parse(text,&b)
+                }.to yield_successive_args(*expected_words)
+              end
+            end
+          end
+        end
+        context "and the words end with a '-' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo bar- baz"  }
+          it "must skip the trailing '-' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words start with a '_' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo _bar baz"  }
+          it "must skip the leading '_' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words contain '_' characters" do
+          let(:expected_words) { %w[foo_bar baz_qux] }
+          it "must treat the words containing a '_' as a single word" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "and when initialized with chars: keyword argument" do
+            context "and it does not include '_'" do
+              let(:text)           { "foo bar_baz qux"   }
+              let(:expected_words) { %w[foo bar baz qux] }
+              subject { described_class.new(special_chars: ['-']) }
+              it "must split hyphenated words into multiple words" do
+                expect { |b|
+                  subject.parse(text,&b)
+                }.to yield_successive_args(*expected_words)
+              end
+            end
+          end
+        end
+        context "and the words end with a '_' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo bar_ baz"  }
+          it "must skip the trailing '_' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words start with a '.' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo .bar baz"  }
+          it "must skip the leading '.' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and the words contain '.' characters" do
+          let(:text)           { "foo.bar baz.qux"  }
+          let(:expected_words) { %w[foo bar baz qux] }
+          it "must split words containing '.' into multiple words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "and when initialized with chars: keyword argument" do
+            context "and it does include '.'" do
+              let(:expected_words) { %w[foo.bar baz.qux] }
+              subject { described_class.new(special_chars: ['.']) }
+              it "must treat words containing a '.' as a single word" do
+                expect { |b|
+                  subject.parse(text,&b)
+                }.to yield_successive_args(*expected_words)
+              end
+            end
+          end
+        end
+        context "and the words end with a '.' characters" do
+          let(:expected_words) { %w[foo bar baz] }
+          let(:text)           { "foo bar. baz"  }
+          it "must skip the trailing '.' character'" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "but the word is an acronym" do
+            let(:expected_words) { %w[foo B.A.R. baz] }
+            let(:text)           { "foo B.A.R. baz"  }
+            it "must parse whole acronyms'" do
+              expect { |b|
+                subject.parse(text,&b)
+              }.to yield_successive_args(*expected_words)
+            end
+            context "but was initialized with acronyms: false" do
+              let(:expected_words) { %w[foo baz] }
+              subject { described_class.new(acronyms: false) }
+              it "must skip the whole acronyms" do
+                expect { |b|
+                  subject.parse(text,&b)
+                }.to yield_successive_args(*expected_words)
+              end
+            end
+            context "and when initialized with normalize_acronyms: true" do
+              let(:expected_words) { %w[foo BAR baz] }
+              let(:text)           { "foo B.A.R. baz"  }
+              subject { described_class.new(normalize_acronyms: true) }
+              it "must remove the '.' characters from acronyms" do
+                expect { |b|
+                  subject.parse(text,&b)
+                }.to yield_successive_args(*expected_words)
+              end
+            end
+          end
+        end
+      end
+      context "and when the text contains numbers" do
+        let(:text) { expected_words.join(" 1234 ") }
+        it "must ignore whole numbers" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+        context "when initialized with numbers: true" do
+          let(:expected_words) { %w[foo 1234 bar 000 baz 0123] }
+          let(:text)           { expected_words.join(' ')      }
+          subject { described_class.new(numbers: true) }
+          it "must parse whole numbers" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "but the text also contains words that start with digits" do
+          let(:text) { expected_words.map { |word| "123#{word}" }.join(' ') }
+          it "must ignore the leading digits of words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "but the text also contains words that contain digits" do
+          let(:expected_words) { %w[foo bar1baz qux] }
+          it "must not ignore the digits within words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "but when initialized with digits: false" do
+            let(:expected_words) { %w[foo bar baz qux] }
+            let(:text)           { "foo bar2baz qux"   }
+            subject { described_class.new(digits: false) }
+            it "must ignore the leading digits within words" do
+              expect { |b|
+                subject.parse(text,&b)
+              }.to yield_successive_args(*expected_words)
+            end
+          end
+        end
+        context "but the text also contains words that end in digits" do
+          let(:expected_words) { super().map { |word| "#{word}123" } }
+          it "must not ignore the digits within words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+          context "but when initialized with digits: false" do
+            let(:expected_words) { %w[foo bar baz qux] }
+            let(:text)           { "foo bar2 baz qux4" }
+            subject { described_class.new(digits: false) }
+            it "must ignore the leading digits within words" do
+              expect { |b|
+                subject.parse(text,&b)
+              }.to yield_successive_args(*expected_words)
+            end
+          end
+        end
+      end
+      context "and when the text contains new-lines" do
+        let(:text) { expected_words.join($/) }
+        it "must ignore new-line characters" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+      end
+      context "and when the text contains stop-words" do
+        let(:stop_words) { %w[a the is be] }
+        let(:text)       { expected_words.zip(stop_words).flatten.join(' ') }
+        it "must ignore the stop-words and parse the non-stop-words" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+        context "and when the stop words are capitlized" do
+          let(:stop_words) { super().map(&:capitalize) }
+          it "must ignore the capitlized stop-words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and when the stop words are uppercase" do
+          let(:stop_words) { super().map(&:upcase) }
+          it "must ignore the uppercase stop-words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and when the text ends with a stop word" do
+          let(:text) { "#{super()} is" }
+          it "must ignore the last stop word" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and when a stop word is followed by other letters" do
+          let(:stop_word)      { "be" }
+          let(:expected_words) { super() + ["#{stop_word}tter"] }
+          it "must not ignore stop words followed by other letters" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and when a stop word is followed by digits" do
+          let(:stop_word)      { "a" }
+          let(:expected_words) { super() + ["#{stop_word}1234"] }
+          it "must not ignore stop words followed by digits" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and when a stop word is followed by punctuation" do
+          let(:stop_words) { %w[is. be, the?] }
+          it "must not ignore stop words followed by punctuation" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+        context "and when the text contains multiple successive stop-words" do
+          let(:text) { (stop_words + expected_words).join(' ') }
+          it "must ignore multiple successive stop-words" do
+            expect { |b|
+              subject.parse(text,&b)
+            }.to yield_successive_args(*expected_words)
+          end
+        end
+      end
+      context "and when #ignore_words contains a String" do
+        let(:ignore_words)   { %w[foo baz] }
+        let(:expected_words) { %w[bar qux] }
+        let(:text)           { "foo bar baz qux" }
+        subject { described_class.new(ignore_words: ignore_words) }
+        it "must filter out words matching that String" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+      end
+      context "and when #ignore_words contains a Regexp" do
+        let(:ignore_words)   { [/ba[a-z]/] }
+        let(:expected_words) { %w[foo qux] }
+        let(:text)           { "foo bar baz qux" }
+        subject { described_class.new(ignore_words: ignore_words) }
+        it "must filter out words matching that Regexp" do
+          expect { |b|
+            subject.parse(text,&b)
+          }.to yield_successive_args(*expected_words)
+        end
+      end
+    end
+    context "when no block is given" do
+      it "must return an Array of the parsed words" do
+        expect(subject.parse(text)).to eq(expected_words)
+      end
+    end
+  end
+end