pragmatic_tokenizer 0.3.4 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pragmatic_tokenizer/tokenizer.rb +2 -1
- data/lib/pragmatic_tokenizer/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f8b5c190992003a3b48fc266d801c818d02d928
|
4
|
+
data.tar.gz: d8782feae2c911fc4fb8495f04aff32bd95ac45a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28dcbe0dcfddb462fd2273bbb41d3e3a9c89e6ed74aeafda8e2776e5ac7ba6b4d94d958b6c38f7738779e7ffc99568fdb46df9b7c664dbef4fb750e427d96af4
|
7
|
+
data.tar.gz: 8c3954220a4f35efbf883bf82338e1e67b4f1eb2fe574c7f9f3c825a46e8ef542d552172da3c09224360903673660d5cbd5282611475510dbf5a58a72c74af66
|
@@ -25,7 +25,8 @@ module PragmaticTokenizer
|
|
25
25
|
# Punctuation 'only': Removes everything except punctuation. The
|
26
26
|
# returned result is an array of only the punctuation.
|
27
27
|
end
|
28
|
-
|
28
|
+
raise "In Pragmatic Tokenizer text must be a String" unless text.class == String
|
29
|
+
@text = CGI.unescapeHTML(text)
|
29
30
|
@language = language.to_s
|
30
31
|
@language_module = Languages.get_language_by_code(language.to_s)
|
31
32
|
@punctuation = punctuation.to_s
|