textoken 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/textoken/options/exclude.rb +3 -16
- data/lib/textoken/options/exclude_regexp.rb +7 -0
- data/lib/textoken/options/less_than.rb +5 -19
- data/lib/textoken/options/modules/conditional_option.rb +24 -0
- data/lib/textoken/options/modules/numeric_option.rb +29 -0
- data/lib/textoken/options/modules/regexp_option.rb +11 -0
- data/lib/textoken/options/more_than.rb +5 -19
- data/lib/textoken/options/only.rb +3 -16
- data/lib/textoken/options/only_regexp.rb +7 -0
- data/lib/textoken/version.rb +1 -1
- data/lib/textoken.rb +5 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80a781929cd70c8b3dc465e584c52e03a0a741ae
|
4
|
+
data.tar.gz: dddb77f4f78bef3e14fb337ffddc8552791e5375
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a710083d4674593e17d811413d3439785a206d8c38ef69c6b4b30ea03db1d63e03a97590b3bd00e2f07181c748f0f4592c1ec9d705365f84d7b8665f3f46b571
|
7
|
+
data.tar.gz: 9c93000700578a98cf0348afe3356622b8e740d7f97511e55710d3cf71b3a4e642628969f1391ded74f64714d2717d9492d2a493ae6568fc7f13748ff2002ea5
|
@@ -2,27 +2,14 @@ module Textoken
|
|
2
2
|
# This option object excludes words in text via matching regexp
|
3
3
|
# Non-excluded words pushed to findings array
|
4
4
|
class Exclude
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
1
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(values)
|
12
|
-
@regexps = Searcher.new(values).regexps
|
13
|
-
@findings = Findings.new
|
14
|
-
end
|
5
|
+
include ConditionalOption
|
15
6
|
|
16
7
|
# base.text is raw tokens splitted with ' '
|
17
8
|
# values are Regexps array to search
|
18
9
|
# base.findings, Findings object for pushing matching tokens
|
19
10
|
def tokenize(base)
|
20
|
-
|
21
|
-
|
22
|
-
findings.push(i, t) unless t.match(r)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
findings.result
|
11
|
+
@base = base
|
12
|
+
tokenize_if { |word, regexp| !word.match(regexp) }
|
26
13
|
end
|
27
14
|
end
|
28
15
|
end
|
@@ -2,31 +2,17 @@ module Textoken
|
|
2
2
|
# This option object picks words in text with less than length
|
3
3
|
# of the option value
|
4
4
|
class LessThan
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
2
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(value)
|
12
|
-
check_value(value)
|
13
|
-
@number = value
|
14
|
-
@findings = Findings.new
|
15
|
-
end
|
5
|
+
include NumericOption
|
16
6
|
|
17
7
|
def tokenize(base)
|
18
|
-
base
|
19
|
-
|
20
|
-
end
|
21
|
-
findings.result
|
8
|
+
@base = base
|
9
|
+
tokenize_if { |word| word.length < number }
|
22
10
|
end
|
23
11
|
|
24
12
|
private
|
25
13
|
|
26
|
-
def
|
27
|
-
|
28
|
-
Textoken.type_err "value #{value} is not permitted for
|
29
|
-
less_than option it has to be 2 at least."
|
14
|
+
def validate_option_value(value)
|
15
|
+
validate { value.class == Fixnum && value > 1 }
|
30
16
|
end
|
31
17
|
end
|
32
18
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Textoken
|
2
|
+
# This module will be shared in options like, only and exclude
|
3
|
+
module ConditionalOption
|
4
|
+
attr_reader :regexps, :findings, :base
|
5
|
+
|
6
|
+
def priority
|
7
|
+
1
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(values)
|
11
|
+
@regexps = Searcher.new(values).regexps
|
12
|
+
@findings = Findings.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def tokenize_if(&block)
|
16
|
+
regexps.each do |r|
|
17
|
+
base.text.each_with_index do |w, i|
|
18
|
+
findings.push(i, w) if block.call(w, r)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
findings.result
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Textoken
|
2
|
+
# This module will be shared in options like, more_than and less_than
|
3
|
+
module NumericOption
|
4
|
+
attr_reader :number, :findings, :base
|
5
|
+
|
6
|
+
def priority
|
7
|
+
2
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(value)
|
11
|
+
validate_option_value(value)
|
12
|
+
@number = value
|
13
|
+
@findings = Findings.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def tokenize_if(&code)
|
17
|
+
base.text.each_with_index do |w, i|
|
18
|
+
findings.push(i, w) if code.call(w)
|
19
|
+
end
|
20
|
+
findings.result
|
21
|
+
end
|
22
|
+
|
23
|
+
def validate(&code)
|
24
|
+
return if code.call
|
25
|
+
Textoken.expression_err "value #{number} is not permitted for
|
26
|
+
#{self.class.name} option."
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -2,31 +2,17 @@ module Textoken
|
|
2
2
|
# This option object picks words in text with more than length
|
3
3
|
# of the option value
|
4
4
|
class MoreThan
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
2
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(value)
|
12
|
-
check_value(value)
|
13
|
-
@number = value
|
14
|
-
@findings = Findings.new
|
15
|
-
end
|
5
|
+
include NumericOption
|
16
6
|
|
17
7
|
def tokenize(base)
|
18
|
-
base
|
19
|
-
|
20
|
-
end
|
21
|
-
findings.result
|
8
|
+
@base = base
|
9
|
+
tokenize_if { |word| word.length > number }
|
22
10
|
end
|
23
11
|
|
24
12
|
private
|
25
13
|
|
26
|
-
def
|
27
|
-
|
28
|
-
Textoken.type_err "value #{value} is not permitted for
|
29
|
-
more_than option it has to be 0 at least."
|
14
|
+
def validate_option_value(value)
|
15
|
+
validate { value.class == Fixnum && value >= 0 }
|
30
16
|
end
|
31
17
|
end
|
32
18
|
end
|
@@ -2,27 +2,14 @@ module Textoken
|
|
2
2
|
# This option object selects words in text via matching regexp
|
3
3
|
# regexp should be defined in option_values.yml
|
4
4
|
class Only
|
5
|
-
|
6
|
-
|
7
|
-
def priority
|
8
|
-
1
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(values)
|
12
|
-
@regexps = Searcher.new(values).regexps
|
13
|
-
@findings = Findings.new
|
14
|
-
end
|
5
|
+
include ConditionalOption
|
15
6
|
|
16
7
|
# base.text is raw tokens splitted with ' '
|
17
8
|
# values are Regexps array to search
|
18
9
|
# base.findings, Findings object for pushing matching tokens
|
19
10
|
def tokenize(base)
|
20
|
-
|
21
|
-
|
22
|
-
findings.push(i, t) if t.match(r)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
findings.result
|
11
|
+
@base = base
|
12
|
+
tokenize_if { |word, regexp| word.match(regexp) }
|
26
13
|
end
|
27
14
|
end
|
28
15
|
end
|
data/lib/textoken/version.rb
CHANGED
data/lib/textoken.rb
CHANGED
@@ -9,10 +9,15 @@ require 'textoken/findings'
|
|
9
9
|
require 'textoken/tokenizer'
|
10
10
|
require 'textoken/scanner'
|
11
11
|
|
12
|
+
require 'textoken/options/modules/numeric_option'
|
13
|
+
require 'textoken/options/modules/conditional_option'
|
14
|
+
require 'textoken/options/modules/regexp_option'
|
12
15
|
require 'textoken/options/less_than'
|
13
16
|
require 'textoken/options/more_than'
|
14
17
|
require 'textoken/options/only'
|
15
18
|
require 'textoken/options/exclude'
|
19
|
+
require 'textoken/options/only_regexp'
|
20
|
+
require 'textoken/options/exclude_regexp'
|
16
21
|
|
17
22
|
require 'textoken/factories/option_factory'
|
18
23
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textoken
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Cetin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -75,9 +75,14 @@ files:
|
|
75
75
|
- lib/textoken/findings.rb
|
76
76
|
- lib/textoken/options.rb
|
77
77
|
- lib/textoken/options/exclude.rb
|
78
|
+
- lib/textoken/options/exclude_regexp.rb
|
78
79
|
- lib/textoken/options/less_than.rb
|
80
|
+
- lib/textoken/options/modules/conditional_option.rb
|
81
|
+
- lib/textoken/options/modules/numeric_option.rb
|
82
|
+
- lib/textoken/options/modules/regexp_option.rb
|
79
83
|
- lib/textoken/options/more_than.rb
|
80
84
|
- lib/textoken/options/only.rb
|
85
|
+
- lib/textoken/options/only_regexp.rb
|
81
86
|
- lib/textoken/regexps/option_values.yml
|
82
87
|
- lib/textoken/scanner.rb
|
83
88
|
- lib/textoken/searcher.rb
|