RubyGems - pragmatic_tokenizer - Versions diffs - 1.4.0 → 1.5.0 - Mend

pragmatic_tokenizer 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/.rubocop.yml +184 -0
data/.rubocop_todo.yml +66 -0
data/README.md +0 -7
data/Rakefile +1 -1
data/lib/pragmatic_tokenizer/ending_punctuation_separator.rb +2 -2
data/lib/pragmatic_tokenizer/full_stop_separator.rb +6 -6
data/lib/pragmatic_tokenizer/languages/arabic.rb +1 -1
data/lib/pragmatic_tokenizer/languages/bulgarian.rb +1 -1
data/lib/pragmatic_tokenizer/languages/catalan.rb +1 -1
data/lib/pragmatic_tokenizer/languages/common.rb +4 -4
data/lib/pragmatic_tokenizer/languages/czech.rb +1 -1
data/lib/pragmatic_tokenizer/languages/danish.rb +2 -2
data/lib/pragmatic_tokenizer/languages/deutsch.rb +94 -23
data/lib/pragmatic_tokenizer/languages/dutch.rb +2 -2
data/lib/pragmatic_tokenizer/languages/english.rb +91 -91
data/lib/pragmatic_tokenizer/languages/finnish.rb +2 -2
data/lib/pragmatic_tokenizer/languages/french.rb +1 -1
data/lib/pragmatic_tokenizer/languages/greek.rb +1 -1
data/lib/pragmatic_tokenizer/languages/indonesian.rb +2 -2
data/lib/pragmatic_tokenizer/languages/italian.rb +1 -1
data/lib/pragmatic_tokenizer/languages/latvian.rb +1 -1
data/lib/pragmatic_tokenizer/languages/norwegian.rb +2 -2
data/lib/pragmatic_tokenizer/languages/persian.rb +2 -2
data/lib/pragmatic_tokenizer/languages/polish.rb +2 -2
data/lib/pragmatic_tokenizer/languages/portuguese.rb +2 -2
data/lib/pragmatic_tokenizer/languages/romanian.rb +1 -1
data/lib/pragmatic_tokenizer/languages/russian.rb +2 -2
data/lib/pragmatic_tokenizer/languages/slovak.rb +2 -2
data/lib/pragmatic_tokenizer/languages/spanish.rb +3 -3
data/lib/pragmatic_tokenizer/languages/swedish.rb +2 -2
data/lib/pragmatic_tokenizer/languages/turkish.rb +2 -2
data/lib/pragmatic_tokenizer/languages.rb +28 -28
data/lib/pragmatic_tokenizer/post_processor.rb +38 -24
data/lib/pragmatic_tokenizer/pre_processor.rb +148 -118
data/lib/pragmatic_tokenizer/tokenizer.rb +160 -135
data/lib/pragmatic_tokenizer/version.rb +1 -1
data/pragmatic_tokenizer.gemspec +1 -0
data/spec/languages/bulgarian_spec.rb +17 -13
data/spec/languages/deutsch_spec.rb +110 -86
data/spec/languages/english_spec.rb +465 -342
data/spec/languages/french_spec.rb +3 -2
data/spec/performance_spec.rb +7 -7
data/spec/pragmatic_tokenizer_spec.rb +8 -8
metadata +18 -2

data/spec/languages/english_spec.rb CHANGED Viewed

@@ -3,7 +3,6 @@ require 'spec_helper'
 describe PragmaticTokenizer do
   context 'Language: English (en)' do
     context '#tokenize (example strings)' do
       context 'no options selected' do
         it 'tokenizes a string #001' do
           text = "Hello world."
@@ -171,7 +170,7 @@ describe PragmaticTokenizer do
           text = "U.S.A. U.S.A. U.S.A."
           pt = PragmaticTokenizer::Tokenizer.new(text)
           expect(pt.tokenize).to eq(
-            ["u.s.a.", "u.s.a.", "u.s.a."]
+              ["u.s.a.", "u.s.a.", "u.s.a."]
           )
         end
       end
@@ -186,8 +185,9 @@ describe PragmaticTokenizer do
         it 'fails to recognize an English abbreviation if the user supplies an abbreviations array without it' do
           text = "Mr. Smith, hello world."
           abbreviations = ['mrs']
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            abbreviations: abbreviations
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              abbreviations: abbreviations
           )
           expect(pt.tokenize).to eq(["mr", ".", "smith", ",", "hello", "world", "."])
         end
@@ -195,8 +195,9 @@ describe PragmaticTokenizer do
         it 'recognizes a user-supplied abbreviation' do
           text = "thisisnotanormalabbreviation. hello world."
           abbreviations = ['thisisnotanormalabbreviation']
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            abbreviations: abbreviations
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              abbreviations: abbreviations
           )
           expect(pt.tokenize).to eq(["thisisnotanormalabbreviation.", "hello", "world", "."])
         end
@@ -204,16 +205,18 @@ describe PragmaticTokenizer do
         it 'handles an empty user-supplied abbreviation array' do
           text = "thisisnotanormalabbreviation. hello world."
           abbreviations = []
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            abbreviations: abbreviations
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              abbreviations: abbreviations
           )
           expect(pt.tokenize).to eq(["thisisnotanormalabbreviation", ".", "hello", "world", "."])
         end
         it 'handles abrreviations across multiple languages' do
           text = "Mr. Smith how are ü. today."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            filter_languages: [:en, :de]
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              filter_languages: [:en, :de]
           )
           expect(pt.tokenize).to eq(["mr.", "smith", "how", "are", "ü.", "today", "."])
         end
@@ -221,9 +224,10 @@ describe PragmaticTokenizer do
         it 'handles abrreviations across multiple languages and user-supplied abbreviations' do
           text = "Adj. Smith how are ü. today. thisisnotanormalabbreviation. is it?"
           abbreviations = ['thisisnotanormalabbreviation']
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            filter_languages: [:en, :de],
-            abbreviations: abbreviations
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              filter_languages: [:en, :de],
+              abbreviations:    abbreviations
           )
           expect(pt.tokenize).to eq(["adj.", "smith", "how", "are", "ü.", "today", ".", "thisisnotanormalabbreviation.", "is", "it", "?"])
         end
@@ -240,9 +244,10 @@ describe PragmaticTokenizer do
         it 'expands user-supplied contractions' do
           text = "Hello supa'soo guy."
           contractions = { "supa'soo" => "super smooth" }
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            contractions: contractions,
-            expand_contractions: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              contractions:        contractions,
+              expand_contractions: true
           )
           expect(pt.tokenize).to eq(["hello", "super", "smooth", "guy", "."])
         end
@@ -250,29 +255,32 @@ describe PragmaticTokenizer do
         it 'does not expands user-supplied contractions' do
           text = "Hello supa'soo guy."
           contractions = { "supa'soo" => "super smooth" }
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            contractions: contractions,
-            expand_contractions: false
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              contractions:        contractions,
+              expand_contractions: false
           )
-          expect(pt.tokenize).to eq( ["hello", "supa'soo", "guy", "."])
+          expect(pt.tokenize).to eq(["hello", "supa'soo", "guy", "."])
         end
         it 'expands user-supplied contractions and language contractions' do
           text = "Hello supa'soo guy. auf's wasn't it?"
           contractions = { "supa'soo" => "super smooth" }
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            contractions: contractions,
-            expand_contractions: true,
-            filter_languages: [:en, :de]
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              contractions:        contractions,
+              expand_contractions: true,
+              filter_languages:    [:en, :de]
           )
           expect(pt.tokenize).to eq(["hello", "super", "smooth", "guy", ".", "auf", "das", "was", "not", "it", "?"])
         end
         it 'expands language contractions' do
           text = "Hello supa'soo guy. auf's wasn't it?"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            expand_contractions: true,
-            filter_languages: [:en, :de]
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              expand_contractions: true,
+              filter_languages:    [:en, :de]
           )
           expect(pt.tokenize).to eq(["hello", "supa'soo", "guy", ".", "auf", "das", "was", "not", "it", "?"])
         end
@@ -280,8 +288,9 @@ describe PragmaticTokenizer do
         it 'tokenizes a string #001' do
           # https://www.ibm.com/developerworks/community/blogs/nlp/entry/tokenization?lang=en
           text = "\"I said, 'what're you? Crazy?'\" said Sandowsky. \"I can't afford to do that.\""
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            expand_contractions: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              expand_contractions: true
           )
           expect(pt.tokenize).to eq(['"', 'i', 'said', ',', "'", 'what', 'are', 'you', '?', 'crazy', '?', "'", '"', 'said', 'sandowsky', '.', '"', 'i', 'cannot', 'afford', 'to', 'do', 'that', '.', '"'])
         end
@@ -289,24 +298,27 @@ describe PragmaticTokenizer do
         it 'tokenizes a string #002' do
           # http://nlp.stanford.edu/software/tokenizer.shtml
           text = "\"Oh, no,\" she's saying, \"our $400 blender can't handle something this hard!\""
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            expand_contractions: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              expand_contractions: true
           )
           expect(pt.tokenize).to eq(['"', 'oh', ',', 'no', ',', '"', 'she', 'is', 'saying', ',', '"', 'our', '$400', 'blender', 'cannot', 'handle', 'something', 'this', 'hard', '!', '"'])
         end
         it 'tokenizes a string #003' do
           text = "Look for his/her account."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            expand_contractions: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              expand_contractions: true
           )
           expect(pt.tokenize).to eq(["look", "for", "his", "her", "account", "."])
         end
         it 'tokenizes a string #004' do
           text = "I like apples and/or oranges."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            expand_contractions: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              expand_contractions: true
           )
           expect(pt.tokenize).to eq(["i", "like", "apples", "and", "or", "oranges", "."])
         end
@@ -315,8 +327,9 @@ describe PragmaticTokenizer do
       context 'option (emojis)' do
         it 'removes emoji' do
           text = "Return the emoji 👿😍😱🐔🌚. 🌚"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_emoji: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_emoji: true
           )
           expect(pt.tokenize).to eq(["return", "the", "emoji", "."])
         end
@@ -329,24 +342,27 @@ describe PragmaticTokenizer do
         it 'removes snowflakes 1' do
           text = "hello❄️❄️❄️"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_emoji: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_emoji: true
           )
           expect(pt.tokenize).to eq(["hello"])
         end
         it 'removes snowflakes 2' do
           text = "hello\u2744\uFE0E\u2744\uFE0E\u2744\uFE0E"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_emoji: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_emoji: true
           )
           expect(pt.tokenize).to eq(["hello"])
         end
         it 'removes snowflakes 3' do
           text = "hello\u2744\u2744\u2744"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_emoji: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_emoji: true
           )
           expect(pt.tokenize).to eq(["hello"])
         end
@@ -355,24 +371,27 @@ describe PragmaticTokenizer do
       context 'option (hashtags)' do
         it 'tokenizes a string #001' do
           text = "This is a #hashtag yay!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            hashtags: :remove
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              hashtags: :remove
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "yay", "!"])
         end
         it 'tokenizes a string #002' do
           text = "This is a #hashtag yay!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            hashtags: :keep_and_clean
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              hashtags: :keep_and_clean
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "hashtag", "yay", "!"])
         end
         it 'tokenizes a string #003' do
           text = "This is a #hashtag yay!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            hashtags: :keep_original
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              hashtags: :keep_original
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "#hashtag", "yay", "!"])
         end
@@ -381,24 +400,27 @@ describe PragmaticTokenizer do
       context 'option (mentions)' do
         it 'tokenizes a string #001' do
           text = "This is a @mention ＠mention2 yay!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            mentions: :remove
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              mentions: :remove
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "yay", "!"])
         end
         it 'tokenizes a string #002' do
           text = "This is a @mention ＠mention2 yay!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            mentions: :keep_and_clean
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              mentions: :keep_and_clean
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "mention", "mention2", "yay", "!"])
         end
         it 'tokenizes a string #003' do
           text = "This is a @mention ＠mention2 yay!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            mentions: :keep_original
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              mentions: :keep_original
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "@mention", "＠mention2", "yay", "!"])
         end
@@ -407,8 +429,9 @@ describe PragmaticTokenizer do
       context 'option (email addresses)' do
         it 'tokenizes a string #001' do
           text = "Here are some emails jon@hotmail.com ben123＠gmail.com."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_emails: :true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_emails: :true
           )
           expect(pt.tokenize).to eq(["here", "are", "some", "emails", "."])
         end
@@ -421,8 +444,9 @@ describe PragmaticTokenizer do
         it 'knows what is not an email address' do
           text = "the great cook.@someone something else@whoever"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_emails: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_emails: true
           )
           expect(pt.tokenize).to eq(["the", "great", "cook", ".", "@someone", "something", "else@whoever"])
         end
@@ -431,8 +455,9 @@ describe PragmaticTokenizer do
       context 'option (urls)' do
         it 'tokenizes a string #001' do
           text = "Here are some domains and urls google.com https://www.google.com www.google.com."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_urls: :true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_urls: :true
           )
           expect(pt.tokenize).to eq(["here", "are", "some", "domains", "and", "urls", "google.com", "www.google.com", "."])
         end
@@ -447,8 +472,9 @@ describe PragmaticTokenizer do
       context 'option (domains)' do
         it 'tokenizes a string #001' do
           text = "Here are some domains and urls google.com https://www.google.com www.google.com."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_domains: :true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_domains: :true
           )
           expect(pt.tokenize).to eq(["here", "are", "some", "domains", "and", "urls", "https://www.google.com", "."])
         end
@@ -462,24 +488,27 @@ describe PragmaticTokenizer do
         it 'knows what is not a domain 1' do
           skip "NOT IMPLEMENTED"
           text = "this is a sentence.and no domain."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_domains: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_domains: true
           )
           expect(pt.tokenize).to eq(["this", "is", "a", "sentence", ".", "and", "no", "domain", "."])
         end
         it 'knows what is not a domain 2' do
           text = "former president g.w.bush was..."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_domains: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_domains: true
           )
           expect(pt.tokenize).to eq(["former", "president", "g.w.bush", "was", "..."])
         end
         it 'knows what is not a domain 3' do
           text = "2.something-times"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            remove_domains: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              remove_domains: true
           )
           expect(pt.tokenize).to eq(["2.something-times"])
         end
@@ -488,16 +517,18 @@ describe PragmaticTokenizer do
       context 'option (long_word_split)' do
         it 'tokenizes a string #001' do
           text = "Some main-categories of the mathematics-test have sub-examples that most 14-year olds can't answer, therefor the implementation-instruction made in the 1990-years needs to be revised."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            long_word_split: 10
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              long_word_split: 10
           )
           expect(pt.tokenize).to eq(["some", "main", "categories", "of", "the", "mathematics", "test", "have", "sub", "examples", "that", "most", "14-year", "olds", "can't", "answer", ",", "therefor", "the", "implementation", "instruction", "made", "in", "the", "1990-years", "needs", "to", "be", "revised", "."])
         end
         it 'tokenizes a string #002' do
           text = "Some main-categories of the mathematics-test have sub-examples that most 14-year olds can't answer, therefor the implementation-instruction made in the 1990-years needs to be revised."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            long_word_split: 4
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              long_word_split: 4
           )
           expect(pt.tokenize).to eq(["some", "main", "categories", "of", "the", "mathematics", "test", "have", "sub", "examples", "that", "most", "14", "year", "olds", "can't", "answer", ",", "therefor", "the", "implementation", "instruction", "made", "in", "the", "1990", "years", "needs", "to", "be", "revised", "."])
         end
@@ -506,136 +537,153 @@ describe PragmaticTokenizer do
       context 'option (clean)' do
         it 'tokenizes a string #001' do
           text = "Hello ---------------."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["hello", "."])
         end
         it 'tokenizes a string #002' do
           text = "Hello ____________________ ."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["hello", "."])
         end
         it 'tokenizes a string #003' do
           text = "© ABC Company 1994"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
-          expect(pt.tokenize).to eq(["abc", "company", "1994"])
+          expect(pt.tokenize).to eq(%w(abc company 1994))
         end
         it 'tokenizes a string #004' do
           text = "This sentence has a long string of dots ......................."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
-          expect(pt.tokenize).to eq(["this", "sentence", "has", "a", "long", "string", "of", "dots"])
+          expect(pt.tokenize).to eq(%w(this sentence has a long string of dots))
         end
         it 'tokenizes a string #005' do
           text = "cnn.com mentions this *funny* #hashtag used by @obama http://cnn.com/something"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["cnn.com", "mentions", "this", "funny", "#hashtag", "used", "by", "@obama", "http://cnn.com/something"])
         end
         it 'does not remove a valid hashtag' do
           text = "This #sentence has a long string of dots ......................."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["this", "#sentence", "has", "a", "long", "string", "of", "dots"])
         end
         it 'does not remove a valid mention' do
           text = "This @sentence has a long string of dots ......................."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["this", "@sentence", "has", "a", "long", "string", "of", "dots"])
         end
         it 'cleans words with symbols 1' do
           text = "something.com:article title !!wow look!!1"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["something.com", "article", "title", "wow", "look"])
         end
         it 'cleans words with symbols 2' do
           text = "something.com:article title !!wow look!!1!1!11!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["something.com", "article", "title", "wow", "look"])
         end
         it 'identifies prefixed symbols' do
           text = "look:the sky is blue"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
-          expect(pt.tokenize).to eq(["look", "the", "sky", "is", "blue"])
+          expect(pt.tokenize).to eq(%w(look the sky is blue))
         end
         it 'keeps numbers at the end of mentions and hashtags' do
           text = "#le1101 #artistQ21 @someone12 @someoneelse1 and @somebody1980"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["#le1101", "#artistq21", "@someone12", "@someoneelse1", "and", "@somebody1980"])
         end
         it 'cleans a prefixed weird hyphen' do
           text = [104, 105, 103, 104, 32, 173, 116, 101, 109, 112, 101, 114, 97, 116, 117, 114, 101, 32, 97, 110, 100, 32, 173, 119, 105, 110, 100].pack("U*")
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
-          expect(pt.tokenize).to eq(["high", "temperature", "and", "wind"])
+          expect(pt.tokenize).to eq(%w(high temperature and wind))
         end
         it 'cleans (r) and (c) and (tm)' do
           text = "the oscar® night ©companyname is a trademark™"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
-          expect(pt.tokenize).to eq(["the", "oscar", "night", "companyname", "is", "a", "trademark"])
+          expect(pt.tokenize).to eq(%w(the oscar night companyname is a trademark))
         end
         it 'cleans letters in boxes 1' do
           text = "making🇦🇹postcards"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
-          expect(pt.tokenize).to eq(["making", "postcards"])
+          expect(pt.tokenize).to eq(%w(making postcards))
         end
         it 'removes colons' do
           text = "At 19:30 o'clock: Mad Max: Fury Road"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["at", "19:30", "o'clock", "mad", "max", "fury", "road"])
         end
         it 'removes a hyphen prefix 3' do
           text = "women's clothes and –shoes needed"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["women's", "clothes", "and", "shoes", "needed"])
         end
         it 'does not remove tokens with ampersands' do
           text = "you&amp;me"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean: true
           )
           expect(pt.tokenize).to eq(["you", "&", "me"])
         end
@@ -645,8 +693,9 @@ describe PragmaticTokenizer do
         it 'tokenizes a string #001' do
           # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.ClassicFilterFactory
           text = "I.B.M. cat's can't"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            classic_filter: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              classic_filter: true
           )
           expect(pt.tokenize).to eq(["ibm", "cat", "can't"])
         end
@@ -654,34 +703,38 @@ describe PragmaticTokenizer do
         it 'tokenizes a string #002' do
           # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.ClassicFilterFactory
           text = "St.Veit, which usually would be written St. Veit was not visited by B.Obama reported CNN.com"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            classic_filter: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              classic_filter: true
           )
           expect(pt.tokenize).to eq(["st.veit", ",", "which", "usually", "would", "be", "written", "st", "veit", "was", "not", "visited", "by", "b.obama", "reported", "cnn.com"])
         end
         it 'optimizes the classic filter' do
           text = "therés something"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            classic_filter: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              classic_filter: true
           )
-          expect(pt.tokenize).to eq(["there", "something"])
+          expect(pt.tokenize).to eq(%w(there something))
         end
         it 'optimizes the classic filter' do
           text = [116, 104, 101, 114, 101, 32, 769, 115, 32, 115, 111, 109, 101, 116, 104, 105, 110, 103].pack("U*")
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            classic_filter: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              classic_filter: true
           )
-          expect(pt.tokenize).to eq(["there", "something"])
+          expect(pt.tokenize).to eq(%w(there something))
         end
       end
       context 'option (language)' do
         it 'tokenizes a string #001' do
           text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language: 'en'
           )
           expect(pt.tokenize).to eq(["hello", "ms.", "piggy", ",", "this", "is", "john", ".", "we", "are", "selling", "a", "new", "fridge", "for", "$5,000", ".", "that", "is", "a", "20%", "discount", "over", "the", "nev.", "retailers", ".", "it", "is", "a", "'", "must", "buy", "'", ",", "so", "don't", "hesistate", "."])
         end
@@ -697,8 +750,9 @@ describe PragmaticTokenizer do
             to the consequences for research and innovation or the public interest.\'
             Says Ms. Raines, \'[The judgement] confirms our concern that the absence of
             patent lawyers on the court could prove troublesome.\'"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language: 'en'
           )
           expect(pt.tokenize).to eq(['lisa', 'raines', ',', 'a', 'lawyer', 'and', 'director', 'of', 'government', 'relations', 'for', 'the', 'industrial', 'biotechnical', 'association', ',', 'contends', 'that', 'a', 'judge', 'well-versed', 'in', 'patent', 'law', 'and', 'the', 'concerns', 'of', 'research-based', 'industries', 'would', 'have', 'ruled', 'otherwise', '.', 'and', 'judge', 'newman', ',', 'a', 'former', 'patent', 'lawyer', ',', 'wrote', 'in', 'her', 'dissent', 'when', 'the', 'court', 'denied', 'a', 'motion', 'for', 'a', 'rehearing', 'of', 'the', 'case', 'by', 'the', 'full', 'court', ',', "\'", 'the', "panel's", 'judicial', 'legislation', 'has', 'affected', 'an', 'important', 'high-technological', 'industry', ',', 'without', 'regard', 'to', 'the', 'consequences', 'for', 'research', 'and', 'innovation', 'or', 'the', 'public', 'interest', '.', '\'', 'says', 'ms.', 'raines', ',', '\'', '[', 'the', 'judgement', ']', 'confirms', 'our', 'concern', 'that', 'the', 'absence', 'of', 'patent', 'lawyers', 'on', 'the', 'court', 'could', 'prove', 'troublesome', '.', "\'"])
         end
@@ -707,56 +761,63 @@ describe PragmaticTokenizer do
       context 'option (numbers)' do
         it 'tokenizes a string #001' do
           text = "Hello, that will be $5 dollars. You can pay at 5:00, after it is 500."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :all
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :all
           )
           expect(pt.tokenize).to eq(["hello", ",", "that", "will", "be", "$5", "dollars", ".", "you", "can", "pay", "at", "5:00", ",", "after", "it", "is", "500", "."])
         end
         it 'tokenizes a string #002' do
           text = "Hello, that will be $5 dollars. You can pay at 5:00, after it is 500."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :none
           )
           expect(pt.tokenize).to eq(["hello", ",", "that", "will", "be", "dollars", ".", "you", "can", "pay", "at", ",", "after", "it", "is", "."])
         end
         it 'tokenizes a string #003' do
           text = "2pac U2 50cent blink-182 $500 zero7 M83 B-52s 500"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :semi
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :semi
           )
           expect(pt.tokenize).to eq(["2pac", "u2", "50cent", "blink-182", "$500", "zero7", "m83", "b-52s"])
         end
         it 'tokenizes a string #004' do
           text = "2pac U2 50cent blink-182 zero7 M83 B-52s 500 Hello"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :only
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :only
           )
           expect(pt.tokenize).to eq(["2pac", "u2", "50cent", "blink-182", "zero7", "m83", "b-52s", "500"])
         end
         it 'tokenizes a string #005' do
           text = "2pac U2 50cent blink-182 $500 zero7 M83 B-52s 500"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :none
           )
           expect(pt.tokenize).to eq([])
         end
         it 'tokenizes a string #005' do
           text = "2pac U2 50cent blink-182 $500 zero7 M83 B-52s 500 number iv VI"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :none
           )
           expect(pt.tokenize).to eq(["number"])
         end
         it 'tokenizes a string #006' do
           text = "Remove III Roman Numerals and IX. with a period."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            numbers: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              numbers: :none
           )
           expect(pt.tokenize).to eq(["remove", "roman", "numerals", "and", ".", "with", "a", "period", "."])
         end
@@ -765,8 +826,9 @@ describe PragmaticTokenizer do
       context 'option (minimum_length)' do
         it 'tokenizes a string #001' do
           text = "Let's test the minimum length of fiver."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            minimum_length: 5
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              minimum_length: 5
           )
           expect(pt.tokenize).to eq(["let's", "minimum", "length", "fiver"])
         end
@@ -775,241 +837,271 @@ describe PragmaticTokenizer do
       context 'option (punctuation)' do
         it 'tokenizes a string #001' do
           text = "kath. / evang"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
-          expect(pt.tokenize).to eq(["kath", "evang"])
+          expect(pt.tokenize).to eq(%w(kath evang))
         end
         it 'tokenizes a string #002' do
           text = "derStandard.at › Sport"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["derstandard.at", "sport"])
         end
         it 'tokenizes a string #003' do
           text = "hello ^^"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["hello"])
         end
         it 'tokenizes a string #004' do
           text = "This hyphen – is not...or is it? ... It's a - dash... And a horizontal ellipsis…"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["this", "hyphen", "is", "not", "or", "is", "it", "it's", "a", "dash", "and", "a", "horizontal", "ellipsis"])
         end
         it 'tokenizes a string #005' do
           text = "A sentence. One with two dots.. And with three... Or horizontal ellipsis… which are three dots too."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
-          expect(pt.tokenize).to eq(["a", "sentence", "one", "with", "two", "dots", "and", "with", "three", "or", "horizontal", "ellipsis", "which", "are", "three", "dots", "too"])
+          expect(pt.tokenize).to eq(%w(a sentence one with two dots and with three or horizontal ellipsis which are three dots too))
         end
         it 'tokenizes a string #006' do
           text = "+++ BREAKING +++ something happened; is it interesting?"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
-          expect(pt.tokenize).to eq(["breaking", "something", "happened", "is", "it", "interesting"])
+          expect(pt.tokenize).to eq(%w(breaking something happened is it interesting))
         end
         it 'tokenizes a string #007' do
           text = "Some *interesting stuff* is __happening here__"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["some", "*interesting", "stuff*", "is", "__happening", "here__"])
         end
         it 'tokenizes a string #008' do
           text = "Hello; what is your: name @username **delete**"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["hello", "what", "is", "your", "name", "@username", "**delete**"])
         end
         it 'tokenizes a string #009' do
           text = "hello ;-) yes"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: :none
           )
-          expect(pt.tokenize).to eq(["hello", "yes"])
+          expect(pt.tokenize).to eq(%w(hello yes))
         end
         it 'tokenizes a string #010' do
           text = "hello ;)"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["hello"])
         end
         it 'tokenizes a string #011' do
           text = "Hello ____________________ ."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: :none
           )
           expect(pt.tokenize).to eq(["hello"])
         end
         it 'handles non-domain words with a dot 1' do
           text = "They were being helped.This is solidarity."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
-            )
-          expect(pt.tokenize).to eq(["they", "were", "being", "helped", "this", "is", "solidarity"])
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
+          )
+          expect(pt.tokenize).to eq(%w(they were being helped this is solidarity))
         end
         it 'handles non-domain words with a dot 2' do
           text = "picture was taken in sept.2015"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["picture", "was", "taken", "in", "sept.", "2015"])
         end
         it 'handles non-domain words with a dot 3' do
           text = "They were being helped.This is solidarity. See the breaking news stories about X on cnn.com/europe and english.alarabiya.net, here’s a screenshot: https://t.co/s83k28f29d31s83"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["they", "were", "being", "helped", "this", "is", "solidarity", "see", "the", "breaking", "news", "stories", "about", "x", "on", "cnn.com", "europe", "and", "english.alarabiya.net", "here’s", "a", "screenshot", "https://t.co/s83k28f29d31s83"])
         end
         it 'handles numbers with symbols 1' do
           text = "Pittsburgh Steelers won 18:16 against Cincinnati Bengals!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["pittsburgh", "steelers", "won", "18:16", "against", "cincinnati", "bengals"])
         end
         it 'handles numbers with symbols 2' do
           text = "Pittsburgh Steelers won 18:16 against Cincinnati Bengals!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["pittsburgh", "steelers", "won", "18:16", "against", "cincinnati", "bengals"])
         end
         it 'handles apostrophes and quotes' do
           text = "“Data Visualization: How to Tell Stories with Data — Jeff Korhan” by @AINewsletter"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["data", "visualization", "how", "to", "tell", "stories", "with", "data", "jeff", "korhan", "by", "@ainewsletter"])
         end
         it 'handles mentions' do
           text = ".@someone I disagree"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["@someone", "i", "disagree"])
         end
         it 'handles old school emoticons 2' do
           text = "oooh! <3"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["oooh", "<3"])
         end
         it 'handles old school emoticons 3' do
           text = "@someone &lt;33"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["@someone", "<33"])
         end
         it 'handles words with a symbol prefix 1' do
           text = "Yes! /cc @someone"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["yes", "cc", "@someone"])
         end
         it 'handles words with a emoji suffix' do
           text = "Let's meet there.😝 ok?"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["let's", "meet", "there", "😝", "ok"])
         end
         it 'handles words with a symbol prefix 2' do
           text = "blah blah |photo by @someone"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["blah", "blah", "photo", "by", "@someone"])
         end
         it 'handles pseudo-contractions' do
           text = "I suggest to buy stocks that are low value+have momentum"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
-          expect(pt.tokenize).to eq(["i", "suggest", "to", "buy", "stocks", "that", "are", "low", "value", "have", "momentum"])
+          expect(pt.tokenize).to eq(%w(i suggest to buy stocks that are low value have momentum))
         end
         it 'handles apostrophes and quotes 1' do
           text = "Watch the video of @amandapalmer's song “Killing Type” here"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["watch", "the", "video", "of", "@amandapalmer's", "song", "killing", "type", "here"])
         end
-         it 'handles apostrophes and quotes 2' do
+        it 'handles apostrophes and quotes 2' do
           text = "Watch the video of @amandapalmer`s song “Killing Type” here"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["watch", "the", "video", "of", "@amandapalmer`s", "song", "killing", "type", "here"])
         end
         it 'handles numbers suffixed with a symbol' do
           text = "4 Things Marketers Must Do Better in 2016: blah"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
-          expect(pt.tokenize).to eq(["4", "things", "marketers", "must", "do", "better", "in", "2016", "blah"])
+          expect(pt.tokenize).to eq(%w(4 things marketers must do better in 2016 blah))
         end
         it 'handles words with a emoticon suffix' do
           skip "NOT IMPLEMENTED"
           text = "look, a dog with shoes☺ !!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["look", "a", "dog", "with", "shoes", "☺"])
         end
         it 'handles emoji 1' do
           text = "How bad!😝"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["how", "bad", "😝"])
         end
         it 'handles emoji 2' do
           text = "😝How bad!"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["😝", "how", "bad"])
         end
@@ -1017,16 +1109,18 @@ describe PragmaticTokenizer do
         it 'identifies old school emoticons' do
           skip "NOT IMPLEMENTED"
           text = 'looking forward to the new kodak super8 camera \o/'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["looking", "forward", "to", "the", "new", "kodak", "super8", "camera", '\o/'])
         end
         it 'splits at hashtags' do
           text = "some sentence#RT ... i like u2.#bono"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: :none
           )
           expect(pt.tokenize).to eq(["some", "sentence", "#rt", "i", "like", "u2", "#bono"])
         end
@@ -1035,41 +1129,45 @@ describe PragmaticTokenizer do
       context 'option (remove_stop_words)' do
         it 'removes stop words' do
           text = 'This is a short sentence with explanations and stop words.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            remove_stop_words: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:          'en',
+              remove_stop_words: true
           )
           expect(pt.tokenize).to eq(["short", "sentence", "explanations", "."])
         end
         it 'removes user-supplied stop words' do
           text = 'This is a short sentence with explanations and stop words.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            remove_stop_words: true,
-            stop_words: ["and", "a"]
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:          'en',
+              remove_stop_words: true,
+              stop_words:        %w(and a)
           )
           expect(pt.tokenize).to eq(["this", "is", "short", "sentence", "with", "explanations", "stop", "words", "."])
         end
         it 'removes user-supplied stop words and default stop words' do
           text = 'This is a short sentence with explanations and stop words.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            remove_stop_words: true,
-            stop_words: ["sentence"],
-            filter_languages: [:en]
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:          'en',
+              remove_stop_words: true,
+              stop_words:        ["sentence"],
+              filter_languages:  [:en]
           )
           expect(pt.tokenize).to eq(["short", "explanations", "."])
         end
         it 'removes user-supplied stop words and default stop words across multiple languages' do
           text = 'This is a short sentence with explanations and stop words. And achte German words.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            remove_stop_words: true,
-            stop_words: ["sentence"],
-            filter_languages: [:en, :de]
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:          'en',
+              remove_stop_words: true,
+              stop_words:        ["sentence"],
+              filter_languages:  [:en, :de]
           )
           expect(pt.tokenize).to eq(["short", "explanations", ".", "german", "."])
         end
@@ -1078,27 +1176,30 @@ describe PragmaticTokenizer do
       context 'multiple options selected' do
         it 'tokenizes a string #001' do
           text = 'His name is Mr. Smith.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:    'en',
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(['his', 'name', 'is', 'mr.', 'smith'])
         end
         it 'tokenizes a string #002' do
           text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            punctuation: 'only'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:    'en',
+              punctuation: 'only'
           )
           expect(pt.tokenize).to eq([",", ".", ".", ".", "'", "'", ",", "."])
         end
         it 'tokenizes a string #003' do
           text = "Hello the a it experiment one fine."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            remove_stop_words: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:          'en',
+              remove_stop_words: true
           )
           expect(pt.tokenize).to eq(["experiment", "fine", "."])
         end
@@ -1106,214 +1207,235 @@ describe PragmaticTokenizer do
         it 'tokenizes a string #004' do
           # https://www.ibm.com/developerworks/community/blogs/nlp/entry/tokenization?lang=en
           text = "\"I said, 'what're you? Crazy?'\" said Sandowsky. \"I can't afford to do that.\""
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            expand_contractions: true,
-            remove_stop_words: true,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              expand_contractions: true,
+              remove_stop_words:   true,
+              punctuation:         'none'
           )
-          expect(pt.tokenize).to eq(["crazy", "sandowsky", "afford"])
+          expect(pt.tokenize).to eq(%w(crazy sandowsky afford))
         end
         it 'tokenizes a string #005' do
           text = "Hello world with a stop word experiment."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            clean: true,
-            numbers: :none,
-            minimum_length: 3,
-            expand_contractions: true,
-            remove_stop_words: true,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:            'en',
+              clean:               true,
+              numbers:             :none,
+              minimum_length:      3,
+              expand_contractions: true,
+              remove_stop_words:   true,
+              punctuation:         'none'
           )
           expect(pt.tokenize).to eq(["experiment"])
         end
         it 'tokenizes a string #006' do
           text = "Hello; what is your: name @username **delete**"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true,
-            punctuation: 'none'
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean:       true,
+              punctuation: 'none'
           )
           expect(pt.tokenize).to eq(["hello", "what", "is", "your", "name", "@username", "delete"])
         end
         it 'tokenizes a string #007' do
           text = 'His name is Mr. Smith.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            punctuation: 'none',
-            downcase: false
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:    'en',
+              punctuation: 'none',
+              downcase:    false
           )
           expect(pt.tokenize).to eq(['His', 'name', 'is', 'Mr.', 'Smith'])
         end
         it 'tokenizes a string #008' do
           text = "Can't go tonight. Didn't finish."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            downcase: false,
-            expand_contractions: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              downcase:            false,
+              expand_contractions: true
           )
           expect(pt.tokenize).to eq(["Cannot", "go", "tonight", ".", "Did", "not", "finish", "."])
         end
         it 'tokenizes a string #009' do
           text = "Some *interesting stuff* is __happening here__"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none',
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none',
+              clean:       true
           )
-          expect(pt.tokenize).to eq(["some", "interesting", "stuff", "is", "happening", "here"])
+          expect(pt.tokenize).to eq(%w(some interesting stuff is happening here))
         end
         it 'also allows symbols for options' do
           text = 'His name is Mr. Smith.'
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: :en,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:    :en,
+              punctuation: :none
           )
           expect(pt.tokenize).to eq(['his', 'name', 'is', 'mr.', 'smith'])
         end
         it 'handles long strings 1' do
           text = "Hello World. My name is Jonas. What is your name? My name is Jonas IV Smith. There it is! I found it. My name is Jonas E. Smith. Please turn to p. 55. Were Jane and co. at the party? They closed the deal with Pitt, Briggs & Co. at noon. Let's ask Jane and co. They should know. They closed the deal with Pitt, Briggs & Co. It closed yesterday. I can't see Mt. Fuji from here. St. Michael's Church is on 5th st. near the light. That is JFK Jr.'s book. I visited the U.S.A. last year. I live in the E.U. How about you? I live in the U.S. How about you? I work for the U.S. Government in Virginia. I have lived in the U.S. for 20 years. She has $100.00 in her bag. She has $100.00. It is in her bag. He teaches science (He previously worked for 5 years as an engineer.) at the local University. Her email is Jane.Doe@example.com. I sent her an email. The site is: https://www.example.50.com/new-site/awesome_content.html. Please check it out. She turned to him, 'This is great.' she said. She turned to him, \"This is great.\" she said. She turned to him, \"This is great.\" She held the book out to show him. Hello!! Long time no see. Hello?? Who is there? Hello!? Is that you? Hello?! Is that you? 1.) The first item 2.) The second item 1.) The first item. 2.) The second item. 1) The first item 2) The second item 1) The first item. 2) The second item. 1. The first item 2. The second item 1. The first item. 2. The second item. • 9. The first item • 10. The second item ⁃9. The first item ⁃10. The second item a. The first item b. The second item c. The third list item This is a sentence\ncut off in the middle because pdf. It was a cold \nnight in the city. features\ncontact manager\nevents, activities\n You can find it at N°. 1026.253.553. That is where the treasure is. She works at Yahoo! in the accounting department. We make a good team, you and I. Did you see Albert I. Jones yesterday? Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .” \"Bohr [...] used the analogy of parallel stairways [...]\" (Smith 55). If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . . Next sentence. I never meant that.... She left the store. I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it. One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds. . . . The practice was not abandoned. . . ."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            clean: true,
-            minimum_length: 3,
-            expand_contractions: true,
-            remove_stop_words: true,
-            numbers: :none,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:            'en',
+              clean:               true,
+              minimum_length:      3,
+              expand_contractions: true,
+              remove_stop_words:   true,
+              numbers:             :none,
+              punctuation:         :none
           )
           expect(pt.tokenize).to eq(["jonas", "jonas", "smith", "jonas", "smith", "turn", "jane", "party", "closed", "deal", "pitt", "briggs", "noon", "jane", "closed", "deal", "pitt", "briggs", "closed", "yesterday", "mt.", "fuji", "st.", "michael's", "church", "st.", "light", "jfk", "jr.", "book", "visited", "u.s.a.", "year", "live", "e.u.", "live", "u.s.", "work", "u.s.", "government", "virginia", "lived", "u.s.", "years", "bag", "bag", "teaches", "science", "worked", "years", "engineer", "local", "university", "email", "jane.doe@example.com", "email", "site", "check", "turned", "great", "turned", "great", "turned", "great", "held", "book", "long", "time", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "list", "item", "sentence", "cut", "middle", "pdf", "cold", "night", "city", "features", "contact", "manager", "events", "activities", "treasure", "works", "yahoo", "accounting", "department", "good", "team", "albert", "jones", "yesterday", "thoreau", "argues", "simplifying", "one’s", "life", "laws", "universe", "complex", "bohr", "analogy", "parallel", "stairways", "smith", "left", "sentence", "omission", "ellipsis", "marks", "preceded", "space", "sentence", "period", "sentence", "meant", "left", "store", "habit", "weakened", "combining", "self-interpreting", "compounds", "practice", "abandoned"])
         end
         it 'handles long strings 2' do
           text = "Hello World. My name is Jonas. What is your name? My name is Jonas IV Smith. There it is! I found it. My name is Jonas E. Smith. Please turn to p. 55. Were Jane and co. at the party? They closed the deal with Pitt, Briggs & Co. at noon. Let's ask Jane and co. They should know. They closed the deal with Pitt, Briggs & Co. It closed yesterday. I can't see Mt. Fuji from here. St. Michael's Church is on 5th st. near the light. That is JFK Jr.'s book. I visited the U.S.A. last year. I live in the E.U. How about you? I live in the U.S. How about you? I work for the U.S. Government in Virginia. I have lived in the U.S. for 20 years. She has $100.00 in her bag. She has $100.00. It is in her bag. He teaches science (He previously worked for 5 years as an engineer.) at the local University. Her email is Jane.Doe@example.com. I sent her an email. The site is: https://www.example.50.com/new-site/awesome_content.html. Please check it out. She turned to him, 'This is great.' she said. She turned to him, \"This is great.\" she said. She turned to him, \"This is great.\" She held the book out to show him. Hello!! Long time no see. Hello?? Who is there? Hello!? Is that you? Hello?! Is that you? 1.) The first item 2.) The second item 1.) The first item. 2.) The second item. 1) The first item 2) The second item 1) The first item. 2) The second item. 1. The first item 2. The second item 1. The first item. 2. The second item. • 9. The first item • 10. The second item ⁃9. The first item ⁃10. The second item a. The first item b. The second item c. The third list item This is a sentence\ncut off in the middle because pdf. It was a cold \nnight in the city. features\ncontact manager\nevents, activities\n You can find it at N°. 1026.253.553. That is where the treasure is. She works at Yahoo! in the accounting department. We make a good team, you and I. Did you see Albert I. Jones yesterday? Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .” \"Bohr [...] used the analogy of parallel stairways [...]\" (Smith 55). If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . . Next sentence. I never meant that.... She left the store. I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it. One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds. . . . The practice was not abandoned. . . ." * 10
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            clean: true,
-            minimum_length: 3,
-            expand_contractions: true,
-            remove_stop_words: true,
-            numbers: :none,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:            'en',
+              clean:               true,
+              minimum_length:      3,
+              expand_contractions: true,
+              remove_stop_words:   true,
+              numbers:             :none,
+              punctuation:         :none
           )
           expect(pt.tokenize).to eq(["jonas", "jonas", "smith", "jonas", "smith", "turn", "jane", "party", "closed", "deal", "pitt", "briggs", "noon", "jane", "closed", "deal", "pitt", "briggs", "closed", "yesterday", "mt.", "fuji", "st.", "michael's", "church", "st.", "light", "jfk", "jr.", "book", "visited", "u.s.a.", "year", "live", "e.u.", "live", "u.s.", "work", "u.s.", "government", "virginia", "lived", "u.s.", "years", "bag", "bag", "teaches", "science", "worked", "years", "engineer", "local", "university", "email", "jane.doe@example.com", "email", "site", "check", "turned", "great", "turned", "great", "turned", "great", "held", "book", "long", "time", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "item", "list", "item", "sentence", "cut", "middle", "pdf", "cold", "night", "city", "features", "contact", "manager", "events", "activities", "treasure", "works", "yahoo", "accounting", "department", "good", "team", "albert", "jones", "yesterday", "thoreau", "argues", "simplifying", "one’s", "life", "laws", "universe", "complex", "bohr", "analogy", "parallel", "stairways", "smith", "left", "sentence", "omission", "ellipsis", "marks", "preceded", "space", "sentence", "period", "sentence", "meant", "left", "store", "habit", "weakened", "combining", "self-interpreting", "compounds", "practice", "abandoned"] * 10)
         end
         it 'handles markdown' do
           text = "This is _bold_ and this is *italic*"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none',
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none',
+              clean:       true
           )
-          expect(pt.tokenize).to eq(["this", "is", "bold", "and", "this", "is", "italic"])
+          expect(pt.tokenize).to eq(%w(this is bold and this is italic))
         end
         it 'handles single quotes' do
           text = "Recognised as one of the ‘good’ games."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            clean: true,
-            numbers: :none,
-            minimum_length: 3,
-            expand_contractions: true,
-            remove_stop_words: true,
-            punctuation: :none,
-            downcase: true)
-          expect(pt.tokenize).to eq(["recognised", "good", "games"])
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language:            'en',
+              clean:               true,
+              numbers:             :none,
+              minimum_length:      3,
+              expand_contractions: true,
+              remove_stop_words:   true,
+              punctuation:         :none,
+              downcase:            true)
+          expect(pt.tokenize).to eq(%w(recognised good games))
         end
         it 'removes control characters' do
           text = "\u0000 \u001F \u007FHello test."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            language: 'en',
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              language: 'en',
+              clean:    true
           )
           expect(pt.tokenize).to eq(["hello", "test", "."])
         end
         it 'splits too long words with hypens' do
           text = "hi-hat and old-school but not really-important-long-word"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none',
-            long_word_split: 12
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation:     'none',
+              long_word_split: 12
           )
           expect(pt.tokenize).to eq(["hi-hat", "and", "old-school", "but", "not", "really", "important", "long", "word"])
         end
         it 'handles hashtags 2' do
           text = "This is the #upper-#limit"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none',
-            hashtags: :keep_and_clean
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none',
+              hashtags:    :keep_and_clean
           )
-          expect(pt.tokenize).to eq(["this", "is", "the", "upper", "limit"])
+          expect(pt.tokenize).to eq(%w(this is the upper limit))
         end
         it 'handles hashtags 3' do
           text = "The #2016-fun has just begun."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: 'none',
-            hashtags: :keep_and_clean
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: 'none',
+              hashtags:    :keep_and_clean
           )
-          expect(pt.tokenize).to eq(["the", "2016", "fun", "has", "just", "begun"])
+          expect(pt.tokenize).to eq(%w(the 2016 fun has just begun))
         end
         it 'does not clean mentions' do
           text = "@_someone_ because @someone and @_someone was taken"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            mentions: :keep_original,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              mentions: :keep_original,
+              clean:    true
           )
           expect(pt.tokenize).to eq(["@_someone_", "because", "@someone", "and", "@_someone", "was", "taken"])
         end
         it 'removes double single quotes' do
           text = "Strong statement in ''The Day The Earth Caught Fire'' (1961)"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: :none,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: :none,
+              clean:       true
           )
-          expect(pt.tokenize).to eq(["strong", "statement", "in", "the", "day", "the", "earth", "caught", "fire", "1961"])
+          expect(pt.tokenize).to eq(%w(strong statement in the day the earth caught fire 1961))
         end
         it 'removes a hyphen prefix 1' do
           text = "Geopol.-Strategy"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: :none,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: :none,
+              clean:       true
           )
-          expect(pt.tokenize).to eq(["geopol", "strategy"])
+          expect(pt.tokenize).to eq(%w(geopol strategy))
         end
         it 'removes a hyphen prefix 2' do
           text = "The language we use creates the reality we experience.-Michael Hyatt #quote"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            punctuation: :none,
-            clean: true
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              punctuation: :none,
+              clean:       true
           )
           expect(pt.tokenize).to eq(["the", "language", "we", "use", "creates", "the", "reality", "we", "experience", "michael", "hyatt", "#quote"])
         end
         it 'does not remove tokens with ampersands' do
           text = "you&amp;me"
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean:       true,
+              punctuation: :none
           )
-          expect(pt.tokenize).to eq(["you", "me"])
+          expect(pt.tokenize).to eq(%w(you me))
         end
         it 'cleans percent signs not related to numbers' do
           text = "TudoW%1 provides company users a way to offer each other, and guests, and interpreters%6 free assistance. To date, there have been %2 questions asked."
-          pt = PragmaticTokenizer::Tokenizer.new(text,
-            clean: true,
-            numbers: :none,
-            punctuation: :none
+          pt = PragmaticTokenizer::Tokenizer.new(
+              text,
+              clean:       true,
+              numbers:     :none,
+              punctuation: :none
           )
-          expect(pt.tokenize).to eq(["tudow", "provides", "company", "users", "a", "way", "to", "offer", "each", "other", "and", "guests", "and", "interpreters", "free", "assistance", "to", "date", "there", "have", "been", "questions", "asked"])
+          expect(pt.tokenize).to eq(%w(tudow provides company users a way to offer each other and guests and interpreters free assistance to date there have been questions asked))
         end
       end
     end
@@ -1473,8 +1595,9 @@ describe PragmaticTokenizer do
       it 'handles empty tokens' do
         text = "!!!!! https://t.co/xxxx"
-        pt = PragmaticTokenizer::Tokenizer.new(text,
-          punctuation: 'none'
+        pt = PragmaticTokenizer::Tokenizer.new(
+            text,
+            punctuation: 'none'
         )
         expect(pt.tokenize).to eq(["https://t.co/xxxx"])
       end
@@ -1532,4 +1655,4 @@ describe PragmaticTokenizer do
       end
     end
   end
-end
+end