textoken 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/textoken/options/exclude.rb +3 -16
- data/lib/textoken/options/exclude_regexp.rb +7 -0
- data/lib/textoken/options/less_than.rb +5 -19
- data/lib/textoken/options/modules/conditional_option.rb +24 -0
- data/lib/textoken/options/modules/numeric_option.rb +29 -0
- data/lib/textoken/options/modules/regexp_option.rb +11 -0
- data/lib/textoken/options/more_than.rb +5 -19
- data/lib/textoken/options/only.rb +3 -16
- data/lib/textoken/options/only_regexp.rb +7 -0
- data/lib/textoken/version.rb +1 -1
- data/lib/textoken.rb +5 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80a781929cd70c8b3dc465e584c52e03a0a741ae
|
4
|
+
data.tar.gz: dddb77f4f78bef3e14fb337ffddc8552791e5375
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a710083d4674593e17d811413d3439785a206d8c38ef69c6b4b30ea03db1d63e03a97590b3bd00e2f07181c748f0f4592c1ec9d705365f84d7b8665f3f46b571
|
7
|
+
data.tar.gz: 9c93000700578a98cf0348afe3356622b8e740d7f97511e55710d3cf71b3a4e642628969f1391ded74f64714d2717d9492d2a493ae6568fc7f13748ff2002ea5
|
@@ -2,27 +2,14 @@ module Textoken
|
|
2
2
|
# This option object excludes words in text via matching regexp
|
3
3
|
# Non-excluded words pushed to findings array
|
4
4
|
class Exclude
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
1
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(values)
|
12
|
-
@regexps = Searcher.new(values).regexps
|
13
|
-
@findings = Findings.new
|
14
|
-
end
|
5
|
+
include ConditionalOption
|
15
6
|
|
16
7
|
# base.text is raw tokens splitted with ' '
|
17
8
|
# values are Regexps array to search
|
18
9
|
# base.findings, Findings object for pushing matching tokens
|
19
10
|
def tokenize(base)
|
20
|
-
|
21
|
-
|
22
|
-
findings.push(i, t) unless t.match(r)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
findings.result
|
11
|
+
@base = base
|
12
|
+
tokenize_if { |word, regexp| !word.match(regexp) }
|
26
13
|
end
|
27
14
|
end
|
28
15
|
end
|
@@ -2,31 +2,17 @@ module Textoken
|
|
2
2
|
# This option object picks words in text with less than length
|
3
3
|
# of the option value
|
4
4
|
class LessThan
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
2
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(value)
|
12
|
-
check_value(value)
|
13
|
-
@number = value
|
14
|
-
@findings = Findings.new
|
15
|
-
end
|
5
|
+
include NumericOption
|
16
6
|
|
17
7
|
def tokenize(base)
|
18
|
-
base
|
19
|
-
|
20
|
-
end
|
21
|
-
findings.result
|
8
|
+
@base = base
|
9
|
+
tokenize_if { |word| word.length < number }
|
22
10
|
end
|
23
11
|
|
24
12
|
private
|
25
13
|
|
26
|
-
def
|
27
|
-
|
28
|
-
Textoken.type_err "value #{value} is not permitted for
|
29
|
-
less_than option it has to be 2 at least."
|
14
|
+
def validate_option_value(value)
|
15
|
+
validate { value.class == Fixnum && value > 1 }
|
30
16
|
end
|
31
17
|
end
|
32
18
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Textoken
|
2
|
+
# This module will be shared in options like, only and exclude
|
3
|
+
module ConditionalOption
|
4
|
+
attr_reader :regexps, :findings, :base
|
5
|
+
|
6
|
+
def priority
|
7
|
+
1
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(values)
|
11
|
+
@regexps = Searcher.new(values).regexps
|
12
|
+
@findings = Findings.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def tokenize_if(&block)
|
16
|
+
regexps.each do |r|
|
17
|
+
base.text.each_with_index do |w, i|
|
18
|
+
findings.push(i, w) if block.call(w, r)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
findings.result
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Textoken
|
2
|
+
# This module will be shared in options like, more_than and less_than
|
3
|
+
module NumericOption
|
4
|
+
attr_reader :number, :findings, :base
|
5
|
+
|
6
|
+
def priority
|
7
|
+
2
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(value)
|
11
|
+
validate_option_value(value)
|
12
|
+
@number = value
|
13
|
+
@findings = Findings.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def tokenize_if(&code)
|
17
|
+
base.text.each_with_index do |w, i|
|
18
|
+
findings.push(i, w) if code.call(w)
|
19
|
+
end
|
20
|
+
findings.result
|
21
|
+
end
|
22
|
+
|
23
|
+
def validate(&code)
|
24
|
+
return if code.call
|
25
|
+
Textoken.expression_err "value #{number} is not permitted for
|
26
|
+
#{self.class.name} option."
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -2,31 +2,17 @@ module Textoken
|
|
2
2
|
# This option object picks words in text with more than length
|
3
3
|
# of the option value
|
4
4
|
class MoreThan
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
2
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(value)
|
12
|
-
check_value(value)
|
13
|
-
@number = value
|
14
|
-
@findings = Findings.new
|
15
|
-
end
|
5
|
+
include NumericOption
|
16
6
|
|
17
7
|
def tokenize(base)
|
18
|
-
base
|
19
|
-
|
20
|
-
end
|
21
|
-
findings.result
|
8
|
+
@base = base
|
9
|
+
tokenize_if { |word| word.length > number }
|
22
10
|
end
|
23
11
|
|
24
12
|
private
|
25
13
|
|
26
|
-
def
|
27
|
-
|
28
|
-
Textoken.type_err "value #{value} is not permitted for
|
29
|
-
more_than option it has to be 0 at least."
|
14
|
+
def validate_option_value(value)
|
15
|
+
validate { value.class == Fixnum && value >= 0 }
|
30
16
|
end
|
31
17
|
end
|
32
18
|
end
|
@@ -2,27 +2,14 @@ module Textoken
|
|
2
2
|
# This option object selects words in text via matching regexp
|
3
3
|
# regexp should be defined in option_values.yml
|
4
4
|
class Only
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
1
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(values)
|
12
|
-
@regexps = Searcher.new(values).regexps
|
13
|
-
@findings = Findings.new
|
14
|
-
end
|
5
|
+
include ConditionalOption
|
15
6
|
|
16
7
|
# base.text is raw tokens splitted with ' '
|
17
8
|
# values are Regexps array to search
|
18
9
|
# base.findings, Findings object for pushing matching tokens
|
19
10
|
def tokenize(base)
|
20
|
-
|
21
|
-
|
22
|
-
findings.push(i, t) if t.match(r)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
findings.result
|
11
|
+
@base = base
|
12
|
+
tokenize_if { |word, regexp| word.match(regexp) }
|
26
13
|
end
|
27
14
|
end
|
28
15
|
end
|
data/lib/textoken/version.rb
CHANGED
data/lib/textoken.rb
CHANGED
@@ -9,10 +9,15 @@ require 'textoken/findings'
|
|
9
9
|
require 'textoken/tokenizer'
|
10
10
|
require 'textoken/scanner'
|
11
11
|
|
12
|
+
require 'textoken/options/modules/numeric_option'
|
13
|
+
require 'textoken/options/modules/conditional_option'
|
14
|
+
require 'textoken/options/modules/regexp_option'
|
12
15
|
require 'textoken/options/less_than'
|
13
16
|
require 'textoken/options/more_than'
|
14
17
|
require 'textoken/options/only'
|
15
18
|
require 'textoken/options/exclude'
|
19
|
+
require 'textoken/options/only_regexp'
|
20
|
+
require 'textoken/options/exclude_regexp'
|
16
21
|
|
17
22
|
require 'textoken/factories/option_factory'
|
18
23
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textoken
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Cetin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -75,9 +75,14 @@ files:
|
|
75
75
|
- lib/textoken/findings.rb
|
76
76
|
- lib/textoken/options.rb
|
77
77
|
- lib/textoken/options/exclude.rb
|
78
|
+
- lib/textoken/options/exclude_regexp.rb
|
78
79
|
- lib/textoken/options/less_than.rb
|
80
|
+
- lib/textoken/options/modules/conditional_option.rb
|
81
|
+
- lib/textoken/options/modules/numeric_option.rb
|
82
|
+
- lib/textoken/options/modules/regexp_option.rb
|
79
83
|
- lib/textoken/options/more_than.rb
|
80
84
|
- lib/textoken/options/only.rb
|
85
|
+
- lib/textoken/options/only_regexp.rb
|
81
86
|
- lib/textoken/regexps/option_values.yml
|
82
87
|
- lib/textoken/scanner.rb
|
83
88
|
- lib/textoken/searcher.rb
|