textoken 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80a781929cd70c8b3dc465e584c52e03a0a741ae
4
- data.tar.gz: dddb77f4f78bef3e14fb337ffddc8552791e5375
3
+ metadata.gz: e530e61af20068c0e361856680b8df0ce9ace2e9
4
+ data.tar.gz: a94c443c5db50d0270916e8966c212e0321e8579
5
5
  SHA512:
6
- metadata.gz: a710083d4674593e17d811413d3439785a206d8c38ef69c6b4b30ea03db1d63e03a97590b3bd00e2f07181c748f0f4592c1ec9d705365f84d7b8665f3f46b571
7
- data.tar.gz: 9c93000700578a98cf0348afe3356622b8e740d7f97511e55710d3cf71b3a4e642628969f1391ded74f64714d2717d9492d2a493ae6568fc7f13748ff2002ea5
6
+ metadata.gz: 923e3261959c09b20f497f6b5d24a0495220265a23e23cd583f55db7d4eba65f5fe441374d4d11d56fc4a3e5856bb5baeda83c3c0f4994b522659121e207bd4e
7
+ data.tar.gz: 6ccac85435c7192ee2c83bfbc49d2efcff4e8fb695a4e78af5e35d7083ccc4ac4d13930ab9766d906fe41b98f4a50552a9a2aad62fe76a8f66e7cd925bfb6603
@@ -4,11 +4,12 @@ module Textoken
4
4
  class Exclude
5
5
  include ConditionalOption
6
6
 
7
+ private
8
+
7
9
  # base.text is raw tokens splitted with ' '
8
10
  # values are Regexps array to search
9
11
  # base.findings, Findings object for pushing matching tokens
10
- def tokenize(base)
11
- @base = base
12
+ def tokenize_condition
12
13
  tokenize_if { |word, regexp| !word.match(regexp) }
13
14
  end
14
15
  end
@@ -4,15 +4,14 @@ module Textoken
4
4
  class LessThan
5
5
  include NumericOption
6
6
 
7
- def tokenize(base)
8
- @base = base
7
+ private
8
+
9
+ def tokenize_condition
9
10
  tokenize_if { |word| word.length < number }
10
11
  end
11
12
 
12
- private
13
-
14
- def validate_option_value(value)
15
- validate { value.class == Fixnum && value > 1 }
13
+ def validate_option_value
14
+ validate { |value| value > 1 }
16
15
  end
17
16
  end
18
17
  end
@@ -1,6 +1,8 @@
1
1
  module Textoken
2
2
  # This module will be shared in options like, only and exclude
3
3
  module ConditionalOption
4
+ include TokenizableOption
5
+
4
6
  attr_reader :regexps, :findings, :base
5
7
 
6
8
  def priority
@@ -12,6 +14,8 @@ module Textoken
12
14
  @findings = Findings.new
13
15
  end
14
16
 
17
+ private
18
+
15
19
  def tokenize_if(&block)
16
20
  regexps.each do |r|
17
21
  base.text.each_with_index do |w, i|
@@ -1,18 +1,22 @@
1
1
  module Textoken
2
2
  # This module will be shared in options like, more_than and less_than
3
3
  module NumericOption
4
- attr_reader :number, :findings, :base
4
+ include TokenizableOption
5
+
6
+ attr_reader :number, :findings
5
7
 
6
8
  def priority
7
9
  2
8
10
  end
9
11
 
10
12
  def initialize(value)
11
- validate_option_value(value)
12
13
  @number = value
13
14
  @findings = Findings.new
15
+ validate_option_value
14
16
  end
15
17
 
18
+ private
19
+
16
20
  def tokenize_if(&code)
17
21
  base.text.each_with_index do |w, i|
18
22
  findings.push(i, w) if code.call(w)
@@ -21,7 +25,7 @@ module Textoken
21
25
  end
22
26
 
23
27
  def validate(&code)
24
- return if code.call
28
+ return if number.class == Fixnum && code.call(number)
25
29
  Textoken.expression_err "value #{number} is not permitted for
26
30
  #{self.class.name} option."
27
31
  end
@@ -0,0 +1,18 @@
1
+ module Textoken
2
+ # This module will be shared in options like, only_regexp and exclude_regexp
3
+ module TokenizableOption
4
+ attr_reader :base
5
+
6
+ def tokenize(base)
7
+ @base = base
8
+ tokenize_condition
9
+ end
10
+
11
+ private
12
+
13
+ def tokenize_condition
14
+ Textoken.type_err('tokenize_condition method has to be implemented
15
+ for Options.')
16
+ end
17
+ end
18
+ end
@@ -4,15 +4,14 @@ module Textoken
4
4
  class MoreThan
5
5
  include NumericOption
6
6
 
7
- def tokenize(base)
8
- @base = base
7
+ private
8
+
9
+ def tokenize_condition
9
10
  tokenize_if { |word| word.length > number }
10
11
  end
11
12
 
12
- private
13
-
14
- def validate_option_value(value)
15
- validate { value.class == Fixnum && value >= 0 }
13
+ def validate_option_value
14
+ validate { |value| value >= 0 }
16
15
  end
17
16
  end
18
17
  end
@@ -4,11 +4,12 @@ module Textoken
4
4
  class Only
5
5
  include ConditionalOption
6
6
 
7
+ private
8
+
7
9
  # base.text is raw tokens splitted with ' '
8
10
  # values are Regexps array to search
9
11
  # base.findings, Findings object for pushing matching tokens
10
- def tokenize(base)
11
- @base = base
12
+ def tokenize_condition
12
13
  tokenize_if { |word, regexp| word.match(regexp) }
13
14
  end
14
15
  end
@@ -1,3 +1,3 @@
1
1
  module Textoken
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
data/lib/textoken.rb CHANGED
@@ -9,6 +9,7 @@ require 'textoken/findings'
9
9
  require 'textoken/tokenizer'
10
10
  require 'textoken/scanner'
11
11
 
12
+ require 'textoken/options/modules/tokenizable_option'
12
13
  require 'textoken/options/modules/numeric_option'
13
14
  require 'textoken/options/modules/conditional_option'
14
15
  require 'textoken/options/modules/regexp_option'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textoken
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Cetin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-06 00:00:00.000000000 Z
11
+ date: 2015-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -44,23 +44,9 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '10.0'
47
- - !ruby/object:Gem::Dependency
48
- name: pry
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :development
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '0'
61
- description: "Textoken is a Ruby library for text tokenization. \n This gem extracts
62
- words from text with many customizations. \n It can be used in many fields like
63
- crawling and Natural Language Processing."
47
+ description: Textoken is a Ruby library for text tokenization. This gem extracts words
48
+ from text with many customizations. It can be used in many fields like Web Crawling
49
+ and Natural Language Processing.
64
50
  email:
65
51
  - mcetin.cm@gmail.com
66
52
  executables: []
@@ -80,6 +66,7 @@ files:
80
66
  - lib/textoken/options/modules/conditional_option.rb
81
67
  - lib/textoken/options/modules/numeric_option.rb
82
68
  - lib/textoken/options/modules/regexp_option.rb
69
+ - lib/textoken/options/modules/tokenizable_option.rb
83
70
  - lib/textoken/options/more_than.rb
84
71
  - lib/textoken/options/only.rb
85
72
  - lib/textoken/options/only_regexp.rb
@@ -108,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
95
  version: '0'
109
96
  requirements: []
110
97
  rubyforge_project:
111
- rubygems_version: 2.4.5.1
98
+ rubygems_version: 2.4.8
112
99
  signing_key:
113
100
  specification_version: 4
114
101
  summary: Simple and customizable text tokenization gem.