pragmatic_tokenizer 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d9af93943342474b5d71cd2f57a08e1f6bbc51d
4
- data.tar.gz: efcbcfd9f0c49dd74b6cd121780c45d3fe54dc57
3
+ metadata.gz: 9defa80663e9e5955967ba95d1bd346b5e06cca3
4
+ data.tar.gz: 5f3ec1a9392c2664b3ce8a26a7de9ba03e3a22e1
5
5
  SHA512:
6
- metadata.gz: 13821d3deb7385c7b8ece7fe92e296a69318b0d8bf00a7284a0d52c0b38dcc2ecfa094bb90c21a04da2782adca30c726493f03ff5aa8f54c490897fb85e83e90
7
- data.tar.gz: 871b3bf53dacc6ca2e7a9beaa62df5bb26101bb10ed1a84fe99acc6a8a8d7db33a01814982d0a60bdfd509f6eaac28b1ea358b499f833afa242dcd6f468c9cf6
6
+ metadata.gz: ddac620c5e335ee8ec3daa1c78ed96d9637e37d57dbb34886a153d7f292bad173ca823c291c44fc1a6f047276049d2e96562ab0a477c5cc58d6f0385636b3ba3
7
+ data.tar.gz: 9361da2cada5ab2e301601f4ddd50bde3fe7935f1b88447a3c5356b72063357a319471b4f1405f2c7aec3fa07f8c794f7a3aadc24a0e82919d2b829411725505
@@ -23,6 +23,7 @@ module PragmaticTokenizer
23
23
  shift_vertical_bar(text)
24
24
  convert_dbl_quotes(text)
25
25
  convert_sgl_quotes(text)
26
+ convert_apostrophe_s(text)
26
27
  shift_beginning_hyphen(text)
27
28
  shift_ending_hyphen(text)
28
29
  text.squeeze(' ')
@@ -120,6 +121,11 @@ module PragmaticTokenizer
120
121
  end
121
122
  end
122
123
 
124
+ def convert_apostrophe_s(text)
125
+ puts "Text: #{text.include?("\u{0301}")}"
126
+ text.gsub!(/\s\u{0301}(?=s(\s|\z))/, PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP['`']) || text
127
+ end
128
+
123
129
  def shift_beginning_hyphen(text)
124
130
  text.gsub!(/\s+-/, ' - ') || text
125
131
  end
@@ -122,7 +122,9 @@ module PragmaticTokenizer
122
122
  private
123
123
 
124
124
  def post_process(text)
125
+ puts "Text: #{text}"
125
126
  @tokens = PostProcessor.new(text: text, abbreviations: abbreviations).post_process
127
+ puts "Tokens: #{@tokens}"
126
128
  downcase! if downcase
127
129
  expand_contractions!(contractions) if expand_contractions
128
130
  clean! if clean
@@ -1,3 +1,3 @@
1
1
  module PragmaticTokenizer
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias