greeb 0.2.2.rc1 → 0.2.2.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 921db15cfa86d9a5e6bf0e80058d349d708d1750
4
- data.tar.gz: 994418e21e9e55ecb42b2329b59efce999281df9
3
+ metadata.gz: 043ee2da87958a027caf792058ae1e3e44cc9684
4
+ data.tar.gz: 8f4a99b26f706badb15fd9e9d5533dd162e090e8
5
5
  SHA512:
6
- metadata.gz: df570f263f22cfb2682ab39e758c6cb25a03c88d96bdcca15c79173d2dd937e28406a7baf90162882810973564d8dccb9aaf0962bb577c623ef1eceb3c6f56e4
7
- data.tar.gz: 425859becdf31d2dc68552ba2f788675098a218063e627ec5f6b46c3a2a003fe685f75f03385289f482166c01e4e02ce5e7172ba7aa0289ae28f4fdc57645f4b
6
+ metadata.gz: e950167615138975bc9873a729f2486eb506692fcdaefdd3aa828590d261da0d336e04e481c652036df892da305269a047ab076622c315bd17b3c015990dcba7
7
+ data.tar.gz: 7642fa3892694606792db842b0ea22a8ba13800b71a3e72eaff93d41ab0548f90a6881298aa290a61110941de350ef9941871e7497fda26b4d1e316e3688c50b
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Greeb
2
2
  Greeb [grʲip] is a simple yet awesome and Unicode-aware text segmentator
3
3
  that is based on regular expressions. API documentation is available at
4
- <https://dmchk.github.com/greeb>.
4
+ <http://rubydoc.info/github/dmchk/greeb/master/frames>.
5
5
 
6
6
  ## Installation
7
7
  Add this line to your application's Gemfile:
@@ -43,8 +43,8 @@ Greeb has a very convinient API that makes you happy.
43
43
  ```ruby
44
44
  pp Greeb::Tokenizer.tokenize('Hello!')
45
45
  =begin
46
- [#<struct Greeb::Entity from=0, to=5, type=:letter>,
47
- #<struct Greeb::Entity from=5, to=6, type=:punct>]
46
+ [#<struct Greeb::Span from=0, to=5, type=:letter>,
47
+ #<struct Greeb::Span from=5, to=6, type=:punct>]
48
48
  =end
49
49
  ```
50
50
 
@@ -59,34 +59,34 @@ EOF
59
59
 
60
60
  pp Greeb::Tokenizer.tokenize(text)
61
61
  =begin
62
- [#<struct Greeb::Entity from=0, to=5, type=:letter>,
63
- #<struct Greeb::Entity from=5, to=6, type=:punct>,
64
- #<struct Greeb::Entity from=6, to=7, type=:separ>,
65
- #<struct Greeb::Entity from=7, to=8, type=:letter>,
66
- #<struct Greeb::Entity from=8, to=9, type=:separ>,
67
- #<struct Greeb::Entity from=9, to=11, type=:letter>,
68
- #<struct Greeb::Entity from=11, to=12, type=:separ>,
69
- #<struct Greeb::Entity from=12, to=14, type=:integer>,
70
- #<struct Greeb::Entity from=14, to=15, type=:punct>,
71
- #<struct Greeb::Entity from=15, to=16, type=:separ>,
72
- #<struct Greeb::Entity from=16, to=18, type=:letter>,
73
- #<struct Greeb::Entity from=18, to=19, type=:separ>,
74
- #<struct Greeb::Entity from=19, to=28, type=:letter>,
75
- #<struct Greeb::Entity from=28, to=29, type=:separ>,
76
- #<struct Greeb::Entity from=29, to=35, type=:letter>,
77
- #<struct Greeb::Entity from=35, to=36, type=:separ>,
78
- #<struct Greeb::Entity from=36, to=38, type=:letter>,
79
- #<struct Greeb::Entity from=38, to=39, type=:separ>,
80
- #<struct Greeb::Entity from=39, to=44, type=:float>,
81
- #<struct Greeb::Entity from=44, to=47, type=:punct>,
82
- #<struct Greeb::Entity from=47, to=49, type=:break>,
83
- #<struct Greeb::Entity from=49, to=53, type=:letter>,
84
- #<struct Greeb::Entity from=53, to=54, type=:separ>,
85
- #<struct Greeb::Entity from=54, to=59, type=:letter>,
86
- #<struct Greeb::Entity from=59, to=60, type=:separ>,
87
- #<struct Greeb::Entity from=60, to=63, type=:letter>,
88
- #<struct Greeb::Entity from=63, to=64, type=:punct>,
89
- #<struct Greeb::Entity from=64, to=65, type=:break>]
62
+ [#<struct Greeb::Span from=0, to=5, type=:letter>,
63
+ #<struct Greeb::Span from=5, to=6, type=:punct>,
64
+ #<struct Greeb::Span from=6, to=7, type=:space>,
65
+ #<struct Greeb::Span from=7, to=8, type=:letter>,
66
+ #<struct Greeb::Span from=8, to=9, type=:space>,
67
+ #<struct Greeb::Span from=9, to=11, type=:letter>,
68
+ #<struct Greeb::Span from=11, to=12, type=:space>,
69
+ #<struct Greeb::Span from=12, to=14, type=:integer>,
70
+ #<struct Greeb::Span from=14, to=15, type=:punct>,
71
+ #<struct Greeb::Span from=15, to=16, type=:space>,
72
+ #<struct Greeb::Span from=16, to=18, type=:letter>,
73
+ #<struct Greeb::Span from=18, to=19, type=:space>,
74
+ #<struct Greeb::Span from=19, to=28, type=:letter>,
75
+ #<struct Greeb::Span from=28, to=29, type=:space>,
76
+ #<struct Greeb::Span from=29, to=35, type=:letter>,
77
+ #<struct Greeb::Span from=35, to=36, type=:space>,
78
+ #<struct Greeb::Span from=36, to=38, type=:letter>,
79
+ #<struct Greeb::Span from=38, to=39, type=:space>,
80
+ #<struct Greeb::Span from=39, to=44, type=:float>,
81
+ #<struct Greeb::Span from=44, to=47, type=:punct>,
82
+ #<struct Greeb::Span from=47, to=49, type=:break>,
83
+ #<struct Greeb::Span from=49, to=53, type=:letter>,
84
+ #<struct Greeb::Span from=53, to=54, type=:space>,
85
+ #<struct Greeb::Span from=54, to=59, type=:letter>,
86
+ #<struct Greeb::Span from=59, to=60, type=:space>,
87
+ #<struct Greeb::Span from=60, to=63, type=:letter>,
88
+ #<struct Greeb::Span from=63, to=64, type=:punct>,
89
+ #<struct Greeb::Span from=64, to=65, type=:break>]
90
90
  =end
91
91
  ```
92
92
 
@@ -99,8 +99,8 @@ text = 'Hello! How are you?'
99
99
  tokens = Greeb::Tokenizer.tokenize(text)
100
100
  pp Greeb::Segmentator.new(tokens).sentences
101
101
  =begin
102
- [#<struct Greeb::Entity from=0, to=6, type=:sentence>,
103
- #<struct Greeb::Entity from=7, to=19, type=:sentence>]
102
+ [#<struct Greeb::Span from=0, to=6, type=:sentence>,
103
+ #<struct Greeb::Span from=7, to=19, type=:sentence>]
104
104
  =end
105
105
  ```
106
106
 
@@ -113,21 +113,21 @@ tokens = Greeb::Tokenizer.tokenize(text)
113
113
  segmentator = Greeb::Segmentator.new(tokens)
114
114
  pp segmentator.extract(segmentator.sentences)
115
115
  =begin
116
- {#<struct Greeb::Entity from=0, to=6, type=:sentence>=>
117
- [#<struct Greeb::Entity from=0, to=5, type=:letter>,
118
- #<struct Greeb::Entity from=5, to=6, type=:punct>],
119
- #<struct Greeb::Entity from=7, to=19, type=:sentence>=>
120
- [#<struct Greeb::Entity from=7, to=10, type=:letter>,
121
- #<struct Greeb::Entity from=10, to=11, type=:separ>,
122
- #<struct Greeb::Entity from=11, to=14, type=:letter>,
123
- #<struct Greeb::Entity from=14, to=15, type=:separ>,
124
- #<struct Greeb::Entity from=15, to=18, type=:letter>,
125
- #<struct Greeb::Entity from=18, to=19, type=:punct>]}
116
+ {#<struct Greeb::Span from=0, to=6, type=:sentence>=>
117
+ [#<struct Greeb::Span from=0, to=5, type=:letter>,
118
+ #<struct Greeb::Span from=5, to=6, type=:punct>],
119
+ #<struct Greeb::Span from=7, to=19, type=:sentence>=>
120
+ [#<struct Greeb::Span from=7, to=10, type=:letter>,
121
+ #<struct Greeb::Span from=10, to=11, type=:space>,
122
+ #<struct Greeb::Span from=11, to=14, type=:letter>,
123
+ #<struct Greeb::Span from=14, to=15, type=:space>,
124
+ #<struct Greeb::Span from=15, to=18, type=:letter>,
125
+ #<struct Greeb::Span from=18, to=19, type=:punct>]}
126
126
  =end
127
127
  ```
128
128
 
129
129
  ### Parsing API
130
- Texts are often include some special entities such as URLs and e-mail
130
+ Texts are often include some special spans such as URLs and e-mail
131
131
  addresses. Greeb can help you in these strings retrieval.
132
132
 
133
133
  #### URL and E-mail retrieval
@@ -136,12 +136,12 @@ text = 'My website is http://nlpub.ru and e-mail is example@example.com.'
136
136
 
137
137
  pp Greeb::Parser.urls(text).map { |e| [e, text[e.from...e.to]] }
138
138
  =begin
139
- [[#<struct Greeb::Entity from=14, to=29, type=:url>, "http://nlpub.ru"]]
139
+ [[#<struct Greeb::Span from=14, to=29, type=:url>, "http://nlpub.ru"]]
140
140
  =end
141
141
 
142
142
  pp Greeb::Parser.emails(text).map { |e| [e, text[e.from...e.to]] }
143
143
  =begin
144
- [[#<struct Greeb::Entity from=44, to=63, type=:email>, "example@example.com"]]
144
+ [[#<struct Greeb::Span from=44, to=63, type=:email>, "example@example.com"]]
145
145
  =end
146
146
  ```
147
147
 
@@ -153,7 +153,7 @@ text = 'Hello, G.L.H.F. everyone!'
153
153
 
154
154
  pp Greeb::Parser.abbrevs(text).map { |e| [e, text[e.from...e.to]] }
155
155
  =begin
156
- [[#<struct Greeb::Entity from=7, to=15, type=:abbrev>, "G.L.H.F."]]
156
+ [[#<struct Greeb::Span from=7, to=15, type=:abbrev>, "G.L.H.F."]]
157
157
  =end
158
158
  ```
159
159
 
@@ -161,13 +161,13 @@ The algorithm is not so accurate, but still useful in many practical
161
161
  situations.
162
162
 
163
163
  ## Tokens
164
- Greeb operates with entities, tuples of *(from, to, kind)*, where
165
- *from* is a beginning of the entity, *to* is an ending of the entity,
166
- and *kind* is a type of the entity.
164
+ Greeb operates with spans, tuples of *(from, to, kind)*, where
165
+ *from* is a beginning of the span, *to* is an ending of the span,
166
+ and *kind* is a type of the span.
167
167
 
168
- There are several entity types at the tokenization stage: `:letter`,
168
+ There are several span types at the tokenization stage: `:letter`,
169
169
  `:float`, `:integer`, `:separ`, `:punct` (for punctuation), `:spunct`
170
- (for in-sentence punctuation), and `:break`.
170
+ (for in-sentence punctuation), `:space`, and `:break`.
171
171
 
172
172
  ## Contributing
173
173
  1. Fork it;
data/bin/greeb CHANGED
@@ -8,6 +8,6 @@ require 'greeb'
8
8
 
9
9
  text = STDIN.read.tap(&:chomp!)
10
10
 
11
- Greeb[text].each do |entity|
12
- puts text[entity.from...entity.to] unless [:space, :break].include? entity.type
11
+ Greeb[text].each do |span|
12
+ puts text[span.from...span.to] unless [:space, :break].include? span.type
13
13
  end
@@ -1,45 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'greeb/version'
4
-
5
- # Greeb operates with entities, tuples of *(from, to, kind)*, where
6
- # *from* is a beginning of the entity, *to* is an ending of the entity,
7
- # and *kind* is a type of the entity.
8
- #
9
- # There are several entity types: `:letter`, `:float`, `:integer`,
10
- # `:separ` for separators, `:punct` for punctuation characters,
11
- # `:spunct` for in-sentence punctuation characters, and
12
- # `:break` for line endings.
13
- #
14
- class Greeb::Entity < Struct.new(:from, :to, :type)
15
- # @private
16
- def <=> other
17
- if (comparison = self.from <=> other.from) == 0
18
- self.to <=> other.to
19
- else
20
- comparison
21
- end
22
- end
23
- end
24
-
25
- # This runtime error appears when {Greeb::Tokenizer} or
26
- # {Greeb::Segmentator} tries to recognize unknown character.
27
- #
28
- class Greeb::UnknownEntity < RuntimeError
29
- attr_reader :text, :pos
30
-
31
- # @private
32
- def initialize(text, pos)
33
- @text, @pos = text, pos
34
- end
35
-
36
- # Generate the real error message.
37
- #
38
- def to_s
39
- 'Could not recognize character "%s" @ %d' % [text[pos], pos]
40
- end
41
- end
42
-
4
+ require 'greeb/exceptions'
5
+ require 'greeb/span'
43
6
  require 'greeb/strscan'
44
7
  require 'greeb/tokenizer'
45
8
  require 'greeb/segmentator'
@@ -13,13 +13,13 @@ module Greeb::Core
13
13
  #
14
14
  # @param text [String] input text.
15
15
  #
16
- # @return [Array<Greeb::Entity>] a set of tokens.
16
+ # @return [Array<Greeb::Span>] a set of tokens.
17
17
  #
18
- def analyze text
18
+ def analyze(text, helpers = HELPERS)
19
19
  Greeb::Tokenizer.tokenize(text).tap do |tokens|
20
- HELPERS.each do |helper|
20
+ helpers.each do |helper|
21
21
  Greeb::Parser.public_send(helper, text).each do |parsed|
22
- extract_tokens(tokens, parsed)
22
+ extract_spans(tokens, parsed)
23
23
  end
24
24
  end
25
25
  end
@@ -28,17 +28,18 @@ module Greeb::Core
28
28
  alias_method :'[]', :analyze
29
29
 
30
30
  protected
31
- # Extact tokens of the specified type from the input tokens set.
31
+ # Extact spans of the specified type from the input spans set.
32
32
  #
33
- # @param tokens [Array<Greeb::Entity>] input tokens set.
34
- # @param entity [Greeb::Entity] token to be extracted.
33
+ # @param spans [Array<Greeb::Span>] input spans set.
34
+ # @param span [Greeb::Span] span to be extracted.
35
35
  #
36
- # @return [Greeb::Entity] token to be extracted.
36
+ # @return [Greeb::Span] span to be extracted.
37
37
  #
38
- def extract_tokens(tokens, entity)
39
- from = tokens.index { |e| e.from == entity.from }
40
- to = tokens.index { |e| e.to == entity.to }
41
- tokens[from..to] = entity
38
+ def extract_spans(spans, span)
39
+ from = spans.index { |e| e.from == span.from }
40
+ to = spans.index { |e| e.to == span.to }
41
+ return unless from && to
42
+ spans[from..to] = span
42
43
  end
43
44
  end
44
45
 
@@ -0,0 +1,17 @@
1
+ # This runtime error appears when {Greeb::Tokenizer} or
2
+ # {Greeb::Segmentator} tries to recognize unknown character.
3
+ #
4
+ class Greeb::UnknownEntity < RuntimeError
5
+ attr_reader :text, :pos
6
+
7
+ # @private
8
+ def initialize(text, pos)
9
+ @text, @pos = text, pos
10
+ end
11
+
12
+ # Generate the real error message.
13
+ #
14
+ def to_s
15
+ 'Could not recognize character "%s" @ %d' % [text[pos], pos]
16
+ end
17
+ end
@@ -16,12 +16,15 @@ module Greeb::Parser
16
16
  # Another horrible pattern. Now for abbreviations.
17
17
  ABBREV = /\b((-{0,1}\p{L}\.)*|(-{0,1}\p{L}\. )*)-{0,1}\p{L}\./i
18
18
 
19
+ # This pattern matches anything that looks like HTML. Or not.
20
+ HTML = /<(.*?)>/i
21
+
19
22
  # Recognize URLs in the input text. Actually, URL is obsolete standard
20
23
  # and this code should be rewritten to use the URI concept.
21
24
  #
22
25
  # @param text [String] input text.
23
26
  #
24
- # @return [Array<Greeb::Entity>] found URLs.
27
+ # @return [Array<Greeb::Span>] found URLs.
25
28
  #
26
29
  def urls(text)
27
30
  scan(text, URL, :url)
@@ -31,7 +34,7 @@ module Greeb::Parser
31
34
  #
32
35
  # @param text [String] input text.
33
36
  #
34
- # @return [Array<Greeb::Entity>] found e-mail addresses.
37
+ # @return [Array<Greeb::Span>] found e-mail addresses.
35
38
  #
36
39
  def emails(text)
37
40
  scan(text, EMAIL, :email)
@@ -41,27 +44,37 @@ module Greeb::Parser
41
44
  #
42
45
  # @param text [String] input text.
43
46
  #
44
- # @return [Array<Greeb::Entity>] found abbreviations.
47
+ # @return [Array<Greeb::Span>] found abbreviations.
45
48
  #
46
49
  def abbrevs(text)
47
50
  scan(text, ABBREV, :abbrev)
48
51
  end
49
52
 
53
+ # Recognize HTML-alike entities in the input text.
54
+ #
55
+ # @param text [String] input text.
56
+ #
57
+ # @return [Array<Greeb::Span>] found HTML entities.
58
+ #
59
+ def html(text)
60
+ scan(text, HTML, :html)
61
+ end
62
+
50
63
  private
51
- # Implementation of regexp-based {Greeb::Entity} scanner.
64
+ # Implementation of regexp-based {Greeb::Span} scanner.
52
65
  #
53
66
  # @param text [String] input text.
54
67
  # @param regexp [Regexp] regular expression to be used.
55
- # @param type [Symbol] type field for the new {Greeb::Entity} instances.
68
+ # @param type [Symbol] type field for the new {Greeb::Span} instances.
56
69
  # @param offset [Fixnum] offset of the next match.
57
70
  #
58
- # @return [Array<Greeb::Entity>] found entities.
71
+ # @return [Array<Greeb::Span>] found entities.
59
72
  #
60
73
  def scan(text, regexp, type, offset = 0)
61
74
  Array.new.tap do |matches|
62
75
  while text and md = text.match(regexp)
63
76
  start, stop = md.offset(0)
64
- matches << Greeb::Entity.new(offset + start, offset + stop, type)
77
+ matches << Greeb::Span.new(offset + start, offset + stop, type)
65
78
  text, offset = text[stop..-1], offset + stop
66
79
  end
67
80
  end
@@ -13,7 +13,7 @@ class Greeb::Segmentator
13
13
 
14
14
  # Create a new instance of {Greeb::Segmentator}.
15
15
  #
16
- # @param tokens [Array<Greeb::Entity>] tokens from [Greeb::Tokenizer].
16
+ # @param tokens [Array<Greeb::Span>] tokens from [Greeb::Tokenizer].
17
17
  #
18
18
  def initialize(tokens)
19
19
  @tokens = tokens
@@ -21,62 +21,60 @@ class Greeb::Segmentator
21
21
 
22
22
  # Sentences memoization method.
23
23
  #
24
- # @return [Array<Greeb::Entity>] a set of sentences.
24
+ # @return [Array<Greeb::Span>] a set of sentences.
25
25
  #
26
26
  def sentences
27
- @sentences ||= detect_entities(new_sentence, [:punct])
27
+ @sentences ||= detect_spans(new_sentence, [:punct])
28
28
  end
29
29
 
30
30
  # Subsentences memoization method.
31
31
  #
32
- # @return [Array<Greeb::Entity>] a set of subsentences.
32
+ # @return [Array<Greeb::Span>] a set of subsentences.
33
33
  #
34
34
  def subsentences
35
- @subsentences ||= detect_entities(new_subsentence, [:punct, :spunct])
35
+ @subsentences ||= detect_spans(new_subsentence, [:punct, :spunct])
36
36
  end
37
37
 
38
38
  # Extract tokens from the set of sentences.
39
39
  #
40
- # @param sentences [Array<Greeb::Entity>] a list of sentences.
40
+ # @param sentences [Array<Greeb::Span>] a list of sentences.
41
41
  #
42
- # @return [Hash<Greeb::Entity, Array<Greeb::Entity>>] a hash with
42
+ # @return [Array<Greeb::Span, Array<Greeb::Span>>] a hash with
43
43
  # sentences as keys and tokens arrays as values.
44
44
  #
45
45
  def extract(sentences, collection = tokens)
46
- Hash[
47
- sentences.map do |s|
48
- [s, collection.select { |t| t.from >= s.from and t.to <= s.to }]
49
- end
50
- ]
46
+ sentences.map do |s|
47
+ [s, collection.select { |t| t.from >= s.from and t.to <= s.to }]
48
+ end
51
49
  end
52
50
 
53
51
  protected
54
- # Implementation of the entity detection method.
52
+ # Implementation of the span detection method.
55
53
  #
56
- # @param sample [Greeb::Entity] a sample of entity to be cloned in the
54
+ # @param sample [Greeb::Span] a sample of span to be cloned in the
57
55
  # process.
58
56
  # @param stop_marks [Array<Symbol>] an array that stores the
59
- # correspondent stop marks of the necessary entities.
57
+ # correspondent stop marks of the necessary spans.
60
58
  #
61
- # @return [Array<Greeb::Entity>] a set of entites.
59
+ # @return [Array<Greeb::Span>] a set of entites.
62
60
  #
63
- def detect_entities(sample, stop_marks)
61
+ def detect_spans(sample, stop_marks)
64
62
  collection = []
65
63
 
66
- rest = tokens.inject(sample.dup) do |entity, token|
67
- next entity if sentence_aint_start? entity, token
68
- entity.from = token.from unless entity.from
69
- next entity if entity.to and entity.to > token.to
64
+ rest = tokens.inject(sample.dup) do |span, token|
65
+ next span if sentence_aint_start? span, token
66
+ span.from = token.from unless span.from
67
+ next span if span.to and span.to > token.to
70
68
 
71
69
  if stop_marks.include? token.type
72
- entity.to = find_forward(tokens, token).to
73
- collection << entity
74
- entity = sample.dup
70
+ span.to = find_forward(tokens, token).to
71
+ collection << span
72
+ span = sample.dup
75
73
  elsif ![:separ, :space].include? token.type
76
- entity.to = token.to
74
+ span.to = token.to
77
75
  end
78
76
 
79
- entity
77
+ span
80
78
  end
81
79
 
82
80
  if rest.from && rest.to
@@ -88,42 +86,42 @@ class Greeb::Segmentator
88
86
 
89
87
  private
90
88
  # Check the possibility of starting a new sentence by the specified
91
- # pair of entity and token.
89
+ # pair of span and token.
92
90
  #
93
- # @param entity [Greeb::Entity] an entity to be checked.
94
- # @param token [Greeb::Entity] an token to be checked.
91
+ # @param span [Greeb::Span] an span to be checked.
92
+ # @param token [Greeb::Span] an token to be checked.
95
93
  #
96
94
  # @return true or false.
97
95
  #
98
- def sentence_aint_start?(entity, token)
99
- !entity.from and SENTENCE_AINT_START.include? token.type
96
+ def sentence_aint_start?(span, token)
97
+ !span.from and SENTENCE_AINT_START.include? token.type
100
98
  end
101
99
 
102
100
  # Find a forwarding token that has another type.
103
101
  #
104
- # @param collection [Array<Greeb::Entity>] array of possible tokens.
105
- # @param sample [Greeb::Entity] a token that is treated as a sample.
102
+ # @param collection [Array<Greeb::Span>] array of possible tokens.
103
+ # @param sample [Greeb::Span] a token that is treated as a sample.
106
104
  #
107
- # @return [Greeb::Entity] a forwarding token.
105
+ # @return [Greeb::Span] a forwarding token.
108
106
  #
109
107
  def find_forward(collection, sample)
110
108
  collection.select { |t| t.from >= sample.from }.
111
109
  inject(sample) { |r, t| t.type == sample.type ? t : (break r) }
112
110
  end
113
111
 
114
- # Create a new instance of {Greeb::Entity} with `:sentence` type.
112
+ # Create a new instance of {Greeb::Span} with `:sentence` type.
115
113
  #
116
- # @return [Greeb::Entity] a new entity instance.
114
+ # @return [Greeb::Span] a new span instance.
117
115
  #
118
116
  def new_sentence
119
- Greeb::Entity.new(nil, nil, :sentence)
117
+ Greeb::Span.new(nil, nil, :sentence)
120
118
  end
121
119
 
122
- # Create a new instance of {Greeb::Entity} with `:subsentence` type.
120
+ # Create a new instance of {Greeb::Span} with `:subsentence` type.
123
121
  #
124
- # @return [Greeb::Entity] a new entity instance.
122
+ # @return [Greeb::Span] a new span instance.
125
123
  #
126
124
  def new_subsentence
127
- Greeb::Entity.new(nil, nil, :subsentence)
125
+ Greeb::Span.new(nil, nil, :subsentence)
128
126
  end
129
127
  end