greeb 0.1.0.rc6 → 0.1.0.rc7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
- # Greeb's tokenization facilities. Use 'em with love.
3
+ # Greeb's tokenization facilities. Use 'em with love.
4
+ #
5
+ # Unicode character categories been obtained from
6
+ # <http://www.fileformat.info/info/unicode/category/index.htm>.
4
7
  #
5
8
  class Greeb::Tokenizer
6
9
  # This runtime error appears when {Greeb::Tokenizer} tries to recognize
@@ -43,12 +46,16 @@ class Greeb::Tokenizer
43
46
 
44
47
  # In-subsentence seprator (i.e.: "*" or "=").
45
48
  #
46
- SEPARATORS = /[ \p{Sm}\p{Pc}\p{Po}\p{Pd}]+/u
49
+ SEPARATORS = /[ \p{Nl}\p{No}\p{Pd}\p{Pc}\p{Po}\p{Sm}\p{So}\p{Sc}\p{Z}]+/u
47
50
 
48
51
  # Line breaks.
49
52
  #
50
53
  BREAKS = /(\r\n|\n|\r)+/u
51
54
 
55
+ # Residuals.
56
+ #
57
+ RESIDUALS = /[\p{C}\p{M}\p{Sk}]+/u
58
+
52
59
  attr_reader :text, :scanner
53
60
  protected :scanner
54
61
 
@@ -86,6 +93,7 @@ class Greeb::Tokenizer
86
93
  split_parse! PUNCTUATIONS, :punct or
87
94
  split_parse! SEPARATORS, :separ or
88
95
  split_parse! BREAKS, :break or
96
+ parse! RESIDUALS, :residual or
89
97
  raise UnknownEntity.new(text, scanner.char_pos)
90
98
  end
91
99
  ensure
data/lib/greeb/version.rb CHANGED
@@ -5,5 +5,5 @@
5
5
  module Greeb
6
6
  # Version of Greeb.
7
7
  #
8
- VERSION = '0.1.0.rc6'
8
+ VERSION = '0.1.0.rc7'
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: greeb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.rc6
4
+ version: 0.1.0.rc7
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-09 00:00:00.000000000 Z
12
+ date: 2012-12-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -109,7 +109,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
109
109
  requirements:
110
110
  - - ! '>='
111
111
  - !ruby/object:Gem::Version
112
- hash: 2757695902770698935
112
+ hash: 2716089438708653231
113
113
  version: '0'
114
114
  segments:
115
115
  - 0