pragmatic_tokenizer 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d89939b4aa11fc9918019838c4877e3c2ed4d4f
4
- data.tar.gz: 012586a964ee04adb2762d7152853ba7d56e7cd4
3
+ metadata.gz: 9d9af93943342474b5d71cd2f57a08e1f6bbc51d
4
+ data.tar.gz: efcbcfd9f0c49dd74b6cd121780c45d3fe54dc57
5
5
  SHA512:
6
- metadata.gz: 0fe3e08af8e3a4b3fb1f8ddc69b743904d8949af2ce26d1388e4e10376d0c906b30a1e6434ede092078096f77b2cbc9dbca5292c1fe95fb1f95d8533da10ea49
7
- data.tar.gz: b96e03429866b9cc26e4c2227535650d967748ffc686a1c0d83959f65f105f98b36a274a4b3b66493f55128640bdcad3ed9cd19c56076cf40da3ee7f8fd007a7
6
+ metadata.gz: 13821d3deb7385c7b8ece7fe92e296a69318b0d8bf00a7284a0d52c0b38dcc2ecfa094bb90c21a04da2782adca30c726493f03ff5aa8f54c490897fb85e83e90
7
+ data.tar.gz: 871b3bf53dacc6ca2e7a9beaa62df5bb26101bb10ed1a84fe99acc6a8a8d7db33a01814982d0a60bdfd509f6eaac28b1ea358b499f833afa242dcd6f468c9cf6
@@ -9,9 +9,9 @@ module PragmaticTokenizer
9
9
  ABBREVIATIONS = [].freeze
10
10
  STOP_WORDS = [].freeze
11
11
  CONTRACTIONS = {}.freeze
12
- EMOJI_REGEX = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}\u{1F914}\u{1F644}]/
13
- PREFIX_EMOJI_REGEX = /(?<=\S)(?=[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}\u{1F914}\u{1F644}])/
14
- POSTFIX_EMOJI_REGEX = /(?<=[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}\u{1F914}\u{1F644}])(?=\S)/
12
+ EMOJI_REGEX = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}\u{1F914}\u{1F644}]/
13
+ PREFIX_EMOJI_REGEX = /(?<=\S)(?=#{EMOJI_REGEX})/
14
+ POSTFIX_EMOJI_REGEX = /(?<=#{EMOJI_REGEX})(?=\S)/
15
15
  EMOTICON_REGEX = /(?::|;|=)(?:-)?(?:\)|D|P)/
16
16
 
17
17
  class SingleQuotes
@@ -3,7 +3,7 @@ module PragmaticTokenizer
3
3
  module Romanian
4
4
  include Languages::Common
5
5
  ABBREVIATIONS = [].freeze
6
- STOP_WORDS = ["acea", "aceasta", "această", "aceea", "acei", "aceia", "acel", "acela", "acele", "acelea", "acest", "acesta", "aceste", "acestea", "aceşti", "aceştia", "acolo", "acord", "acum", "ai", "aia", "aibă", "aici", "al", "ăla", "ale", "alea", "ălea", "altceva", "altcineva", "am", "ar", "are", "aş", "aşadar", "asemenea", "asta", "ăsta", "astăzi", "astea", "ăstea", "ăştia", "asupra", "aţi", "au", "avea", "avem", "aveţi", "azi", "bine", "bucur", "bună", "ca", "că", "căci", "când", "care", "cărei", "căror", "cărui", "cât", "câte", "câţi", "către", "câtva", "caut", "ce", "cel", "ceva", "chiar", "cinci", "cînd", "cine", "cineva", "cît", "cîte", "cîţi", "cîtva", "contra", "cu", "cum", "cumva", "curând", "curînd", "da", "dă", "dacă", "dar", "dată", "datorită", "dau", "de", "deci", "deja", "deoarece", "departe", "deşi", "din", "dinaintea", "dintr-", "dintre", "doi", "doilea", "două", "drept", "după", "ea", "ei", "el", "ele", "eram", "este", "eşti", "eu", "face", "fără", "fata", "fi", "fie", "fiecare", "fii", "fim", "fiţi", "fiu", "frumos", "graţie", "halbă", "iar", "ieri", "îi", "îl", "îmi", "împotriva", "în", "înainte", "înaintea", "încât", "încît", "încotro", "între", "întrucât", "întrucît", "îţi", "la", "lângă", "le", "li", "lîngă", "lor", "lui", "mă", "mai", "mâine", "mea", "mei", "mele", "mereu", "meu", "mi", "mie", "mîine", "mine", "mult", "multă", "mulţi", "mulţumesc", "ne", "nevoie", "nicăieri", "nici", "nimeni", "nimeri", "nimic", "nişte", "noastră", "noastre", "noi", "noroc", "noştri", "nostru", "nouă", "nu", "opt", "ori", "oricând", "oricare", "oricât", "orice", "oricînd", "oricine", "oricît", "oricum", "oriunde", "până", "patra", "patru", "patrulea", "pe", "pentru", "peste", "pic", "pînă", "poate", "pot", "prea", "prima", "primul", "prin", "puţin", "puţina", "puţină", "rog", "sa", "să", "săi", "sale", "şapte", "şase", "sau", "său", "se", "şi", "sînt", "sîntem", "sînteţi", "spate", "spre", "ştiu", "sub", "sunt", "suntem", "sunteţi", "sută", "ta", "tăi", "tale", "tău", "te", "ţi", "ţie", "timp", "tine", "toată", "toate", "tot", "toţi", "totuşi", "trei", "treia", "treilea", "tu", "un", "una", "unde", "undeva", "unei", "uneia", "unele", "uneori", "unii", "unor", "unora", "unu", "unui", "unuia", "unul", "vă", "vi", "voastră", "voastre", "voi", "voştri", "vostru", "vouă", "vreme", "vreo", "vreun", "zece", "zero", "zi", "zice"].freeze
6
+ STOP_WORDS = ["acea", "aceasta", "această", "aceea", "acei", "aceia", "acel", "acela", "acele", "acelea", "acest", "acesta", "aceste", "acestea", "aceşti", "aceştia", "acolo", "acord", "acum", "ai", "aia", "aibă", "aici", "al", "ăla", "ale", "alea", "ălea", "altceva", "altcineva", "am", "ar", "are", "aş", "aşadar", "asemenea", "asta", "ăsta", "astăzi", "astea", "ăstea", "ăştia", "asupra", "aţi", "au", "avea", "avem", "aveţi", "azi", "bine", "bucur", "bună", "ca", "că", "căci", "când", "care", "cărei", "căror", "cărui", "cât", "câte", "câţi", "către", "câtva", "caut", "ce", "cel", "ceva", "chiar", "cinci", "cînd", "cine", "cineva", "cît", "cîte", "cîţi", "cîtva", "contra", "cu", "cum", "cumva", "curând", "curînd", "da", "dă", "dacă", "dar", "dată", "datorită", "dau", "de", "deci", "deja", "deoarece", "departe", "deşi", "din", "dinaintea", "dintr-", "dintre", "doi", "doilea", "două", "drept", "după", "ea", "ei", "el", "ele", "eram", "este", "eşti", "eu", "face", "fără", "fata", "fi", "fie", "fiecare", "fii", "fim", "fiţi", "fiu", "frumos", "graţie", "halbă", "iar", "ieri", "îi", "îl", "îmi", "împotriva", "în", "înainte", "înaintea", "încât", "încît", "încotro", "între", "întrucât", "întrucît", "îţi", "la", "lângă", "le", "li", "lîngă", "lor", "lui", "mă", "mai", "mâine", "mea", "mei", "mele", "mereu", "meu", "mi", "mie", "mîine", "mine", "mult", "multă", "mulţi", "mulţumesc", "ne", "nevoie", "nicăieri", "nici", "nimeni", "nimeri", "nimic", "nişte", "noastră", "noastre", "noi", "noroc", "noştri", "nostru", "nouă", "nu", "opt", "ori", "oricând", "oricare", "oricât", "orice", "oricînd", "oricine", "oricît", "oricum", "oriunde", "până", "patra", "patru", "patrulea", "pe", "pentru", "peste", "pic", "pînă", "poate", "pot", "prea", "prima", "primul", "prin", "puţin", "puţina", "puţină", "rog", "sa", "să", "săi", "sale", "şapte", "şase", "sau", "său", "se", "şi", "sînt", "sîntem", "sînteţi", "spate", "spre", "ştiu", "sub", "sunt", "suntem", "sunteţi", "sută", "ta", "tăi", "tale", "tău", "te", "ţi", "ţie", "timp", "tine", "toată", "toate", "tot", "toţi", "totuşi", "trei", "treia", "treilea", "tu", "un", "una", "unde", "undeva", "unei", "uneia", "unele", "uneori", "unii", "unor", "unora", "unu", "unui", "unuia", "unul", "vă", "vi", "voastră", "voastre", "voi", "vor", "voştri", "vostru", "vouă", "vreme", "vreo", "vreun", "zece", "zero", "zi", "zice"].freeze
7
7
  CONTRACTIONS = {}.freeze
8
8
  end
9
9
  end
@@ -34,6 +34,9 @@ module PragmaticTokenizer
34
34
  t !~ /(\s+|\A)[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,6}(:[0-9]{1,5})?(\/.*)?/ix &&
35
35
  t !~ /\S+(@|@)\S+/ &&
36
36
  abbreviations.include?(Unicode::downcase(t.split(".")[0] == nil ? '' : t.split(".")[0])) ? t.gsub(/\./, '\1. \2').split(' ').flatten : t }
37
+ .flat_map { |t| t =~ /\u{2744}\u{FE0F}/ ? t.gsub(/\u{2744}\u{FE0F}/, " \u{2744}\u{FE0F} ").split(' ').flatten : t }
38
+ .flat_map { |t| t =~ /\u{2744}\u{FE0E}/ ? t.gsub(/\u{2744}\u{FE0E}/, " \u{2744}\u{FE0E} ").split(' ').flatten : t }
39
+ .flat_map { |t| t =~ /(\A|\S)\u{2744}[^\u{FE0E}|\u{FE0F}]/ ? t.gsub(/\u{2744}/, " \u{2744} ").split(' ').flatten : t }
37
40
  .flat_map { |t| t =~ PragmaticTokenizer::Languages::Common::PREFIX_EMOJI_REGEX ? t.gsub(PragmaticTokenizer::Languages::Common::PREFIX_EMOJI_REGEX, '\1 \2').split(' ').flatten : t }
38
41
  .flat_map { |t| t =~ PragmaticTokenizer::Languages::Common::POSTFIX_EMOJI_REGEX ? t.gsub(PragmaticTokenizer::Languages::Common::POSTFIX_EMOJI_REGEX, '\1 \2').split(' ').flatten : t }
39
42
  ).separate
@@ -10,7 +10,10 @@ module PragmaticTokenizer
10
10
  shift_multiple_dash(text)
11
11
  shift_upsidedown_question_mark(text)
12
12
  shift_upsidedown_exclamation(text)
13
+ shift_exclamation(text)
13
14
  shift_ellipse(text)
15
+ shift_no_space_mention(text)
16
+ shift_not_equals(text)
14
17
  shift_special_quotes(text)
15
18
  shift_colon(text)
16
19
  shift_bracket(text)
@@ -45,12 +48,24 @@ module PragmaticTokenizer
45
48
  text.gsub!(/¡/, ' ¡ ') || text
46
49
  end
47
50
 
51
+ def shift_exclamation(text)
52
+ text.gsub!(/(?<=[a-zA-z])!(?=[a-zA-z])/, ' ! ') || text
53
+ end
54
+
48
55
  def shift_ellipse(text)
49
56
  text.gsub!(/(\.\.\.+)/o) { ' ' + $1 + ' ' } || text
50
57
  text.gsub!(/(\.\.+)/o) { ' ' + $1 + ' ' } || text
51
58
  text.gsub!(/(…+)/o) { ' ' + $1 + ' ' } || text
52
59
  end
53
60
 
61
+ def shift_no_space_mention(text)
62
+ text.gsub!(/\.(?=(@|@)[^\.]+(\s|\z))/, '. ') || text
63
+ end
64
+
65
+ def shift_not_equals(text)
66
+ text.gsub!(/≠/, ' ≠ ') || text
67
+ end
68
+
54
69
  def shift_special_quotes(text)
55
70
  text.gsub!(/«/, ' « ') || text
56
71
  text.gsub!(/»/, ' » ') || text
@@ -162,9 +162,10 @@ module PragmaticTokenizer
162
162
  .map { |t| t.gsub(/[[:cntrl:]]/, '') }
163
163
  .map { |t| t.gsub(/(?<=\A)\:(?=.+)/, '') }
164
164
  .map { |t| t.gsub(/(?<=\A)!+(?=.+)/, '') }
165
- .map { |t| t.gsub(/1+(?=\z)/, '') }
165
+ .map { |t| t !~ /[@@#|#]/ ? t.gsub(/(?<=\D)1+(?=\z)/, '') : t }
166
166
  .map { |t| t.gsub(/!+(?=\z)/, '') }
167
167
  .map { |t| t.gsub(/!+(1*!*)*(?=\z)/, '') }
168
+ .map { |t| t.gsub(/\u{00AD}/, '') }
168
169
  .delete_if { |t| t =~ /\A-+\z/ ||
169
170
  PragmaticTokenizer::Languages::Common::SPECIAL_CHARACTERS.include?(t) ||
170
171
  t =~ /\A\.{2,}\z/ || t.include?("\\") ||
@@ -213,7 +214,11 @@ module PragmaticTokenizer
213
214
  end
214
215
 
215
216
  def remove_emoji!
216
- @tokens.delete_if { |t| t =~ PragmaticTokenizer::Languages::Common::EMOJI_REGEX }
217
+ @tokens.delete_if { |t| t =~ PragmaticTokenizer::Languages::Common::EMOJI_REGEX ||
218
+ t =~ /\u{2744}\u{FE0F}/ ||
219
+ t =~ /\u{2744}\u{FE0E}/ ||
220
+ t =~ /\u{2744}/
221
+ }
217
222
  end
218
223
 
219
224
  def remove_emails!
@@ -1,3 +1,3 @@
1
1
  module PragmaticTokenizer
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias