pragmatic_tokenizer 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d9af93943342474b5d71cd2f57a08e1f6bbc51d
4
- data.tar.gz: efcbcfd9f0c49dd74b6cd121780c45d3fe54dc57
3
+ metadata.gz: 9defa80663e9e5955967ba95d1bd346b5e06cca3
4
+ data.tar.gz: 5f3ec1a9392c2664b3ce8a26a7de9ba03e3a22e1
5
5
  SHA512:
6
- metadata.gz: 13821d3deb7385c7b8ece7fe92e296a69318b0d8bf00a7284a0d52c0b38dcc2ecfa094bb90c21a04da2782adca30c726493f03ff5aa8f54c490897fb85e83e90
7
- data.tar.gz: 871b3bf53dacc6ca2e7a9beaa62df5bb26101bb10ed1a84fe99acc6a8a8d7db33a01814982d0a60bdfd509f6eaac28b1ea358b499f833afa242dcd6f468c9cf6
6
+ metadata.gz: ddac620c5e335ee8ec3daa1c78ed96d9637e37d57dbb34886a153d7f292bad173ca823c291c44fc1a6f047276049d2e96562ab0a477c5cc58d6f0385636b3ba3
7
+ data.tar.gz: 9361da2cada5ab2e301601f4ddd50bde3fe7935f1b88447a3c5356b72063357a319471b4f1405f2c7aec3fa07f8c794f7a3aadc24a0e82919d2b829411725505
@@ -23,6 +23,7 @@ module PragmaticTokenizer
23
23
  shift_vertical_bar(text)
24
24
  convert_dbl_quotes(text)
25
25
  convert_sgl_quotes(text)
26
+ convert_apostrophe_s(text)
26
27
  shift_beginning_hyphen(text)
27
28
  shift_ending_hyphen(text)
28
29
  text.squeeze(' ')
@@ -120,6 +121,11 @@ module PragmaticTokenizer
120
121
  end
121
122
  end
122
123
 
124
+ def convert_apostrophe_s(text)
125
+ puts "Text: #{text.include?("\u{0301}")}"
126
+ text.gsub!(/\s\u{0301}(?=s(\s|\z))/, PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP['`']) || text
127
+ end
128
+
123
129
  def shift_beginning_hyphen(text)
124
130
  text.gsub!(/\s+-/, ' - ') || text
125
131
  end
@@ -122,7 +122,9 @@ module PragmaticTokenizer
122
122
  private
123
123
 
124
124
  def post_process(text)
125
+ puts "Text: #{text}"
125
126
  @tokens = PostProcessor.new(text: text, abbreviations: abbreviations).post_process
127
+ puts "Tokens: #{@tokens}"
126
128
  downcase! if downcase
127
129
  expand_contractions!(contractions) if expand_contractions
128
130
  clean! if clean
@@ -1,3 +1,3 @@
1
1
  module PragmaticTokenizer
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias