greeb 0.1.0.rc6 → 0.1.0.rc7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
- # Greeb's tokenization facilities. Use 'em with love.
3
+ # Greeb's tokenization facilities. Use 'em with love.
4
+ #
5
+ # Unicode character categories been obtained from
6
+ # <http://www.fileformat.info/info/unicode/category/index.htm>.
4
7
  #
5
8
  class Greeb::Tokenizer
6
9
  # This runtime error appears when {Greeb::Tokenizer} tries to recognize
@@ -43,12 +46,16 @@ class Greeb::Tokenizer
43
46
 
44
47
  # In-subsentence seprator (i.e.: "*" or "=").
45
48
  #
46
- SEPARATORS = /[ \p{Sm}\p{Pc}\p{Po}\p{Pd}]+/u
49
+ SEPARATORS = /[ \p{Nl}\p{No}\p{Pd}\p{Pc}\p{Po}\p{Sm}\p{So}\p{Sc}\p{Z}]+/u
47
50
 
48
51
  # Line breaks.
49
52
  #
50
53
  BREAKS = /(\r\n|\n|\r)+/u
51
54
 
55
+ # Residuals.
56
+ #
57
+ RESIDUALS = /[\p{C}\p{M}\p{Sk}]+/u
58
+
52
59
  attr_reader :text, :scanner
53
60
  protected :scanner
54
61
 
@@ -86,6 +93,7 @@ class Greeb::Tokenizer
86
93
  split_parse! PUNCTUATIONS, :punct or
87
94
  split_parse! SEPARATORS, :separ or
88
95
  split_parse! BREAKS, :break or
96
+ parse! RESIDUALS, :residual or
89
97
  raise UnknownEntity.new(text, scanner.char_pos)
90
98
  end
91
99
  ensure
data/lib/greeb/version.rb CHANGED
@@ -5,5 +5,5 @@
5
5
  module Greeb
6
6
  # Version of Greeb.
7
7
  #
8
- VERSION = '0.1.0.rc6'
8
+ VERSION = '0.1.0.rc7'
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: greeb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.rc6
4
+ version: 0.1.0.rc7
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-09 00:00:00.000000000 Z
12
+ date: 2012-12-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -109,7 +109,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
109
109
  requirements:
110
110
  - - ! '>='
111
111
  - !ruby/object:Gem::Version
112
- hash: 2757695902770698935
112
+ hash: 2716089438708653231
113
113
  version: '0'
114
114
  segments:
115
115
  - 0