textoken 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80a781929cd70c8b3dc465e584c52e03a0a741ae
4
- data.tar.gz: dddb77f4f78bef3e14fb337ffddc8552791e5375
3
+ metadata.gz: e530e61af20068c0e361856680b8df0ce9ace2e9
4
+ data.tar.gz: a94c443c5db50d0270916e8966c212e0321e8579
5
5
  SHA512:
6
- metadata.gz: a710083d4674593e17d811413d3439785a206d8c38ef69c6b4b30ea03db1d63e03a97590b3bd00e2f07181c748f0f4592c1ec9d705365f84d7b8665f3f46b571
7
- data.tar.gz: 9c93000700578a98cf0348afe3356622b8e740d7f97511e55710d3cf71b3a4e642628969f1391ded74f64714d2717d9492d2a493ae6568fc7f13748ff2002ea5
6
+ metadata.gz: 923e3261959c09b20f497f6b5d24a0495220265a23e23cd583f55db7d4eba65f5fe441374d4d11d56fc4a3e5856bb5baeda83c3c0f4994b522659121e207bd4e
7
+ data.tar.gz: 6ccac85435c7192ee2c83bfbc49d2efcff4e8fb695a4e78af5e35d7083ccc4ac4d13930ab9766d906fe41b98f4a50552a9a2aad62fe76a8f66e7cd925bfb6603
@@ -4,11 +4,12 @@ module Textoken
4
4
  class Exclude
5
5
  include ConditionalOption
6
6
 
7
+ private
8
+
7
9
  # base.text is raw tokens splitted with ' '
8
10
  # values are Regexps array to search
9
11
  # base.findings, Findings object for pushing matching tokens
10
- def tokenize(base)
11
- @base = base
12
+ def tokenize_condition
12
13
  tokenize_if { |word, regexp| !word.match(regexp) }
13
14
  end
14
15
  end
@@ -4,15 +4,14 @@ module Textoken
4
4
  class LessThan
5
5
  include NumericOption
6
6
 
7
- def tokenize(base)
8
- @base = base
7
+ private
8
+
9
+ def tokenize_condition
9
10
  tokenize_if { |word| word.length < number }
10
11
  end
11
12
 
12
- private
13
-
14
- def validate_option_value(value)
15
- validate { value.class == Fixnum && value > 1 }
13
+ def validate_option_value
14
+ validate { |value| value > 1 }
16
15
  end
17
16
  end
18
17
  end
@@ -1,6 +1,8 @@
1
1
  module Textoken
2
2
  # This module will be shared in options like, only and exclude
3
3
  module ConditionalOption
4
+ include TokenizableOption
5
+
4
6
  attr_reader :regexps, :findings, :base
5
7
 
6
8
  def priority
@@ -12,6 +14,8 @@ module Textoken
12
14
  @findings = Findings.new
13
15
  end
14
16
 
17
+ private
18
+
15
19
  def tokenize_if(&block)
16
20
  regexps.each do |r|
17
21
  base.text.each_with_index do |w, i|
@@ -1,18 +1,22 @@
1
1
  module Textoken
2
2
  # This module will be shared in options like, more_than and less_than
3
3
  module NumericOption
4
- attr_reader :number, :findings, :base
4
+ include TokenizableOption
5
+
6
+ attr_reader :number, :findings
5
7
 
6
8
  def priority
7
9
  2
8
10
  end
9
11
 
10
12
  def initialize(value)
11
- validate_option_value(value)
12
13
  @number = value
13
14
  @findings = Findings.new
15
+ validate_option_value
14
16
  end
15
17
 
18
+ private
19
+
16
20
  def tokenize_if(&code)
17
21
  base.text.each_with_index do |w, i|
18
22
  findings.push(i, w) if code.call(w)
@@ -21,7 +25,7 @@ module Textoken
21
25
  end
22
26
 
23
27
  def validate(&code)
24
- return if code.call
28
+ return if number.class == Fixnum && code.call(number)
25
29
  Textoken.expression_err "value #{number} is not permitted for
26
30
  #{self.class.name} option."
27
31
  end
@@ -0,0 +1,18 @@
1
+ module Textoken
2
+ # This module will be shared in options like, only_regexp and exclude_regexp
3
+ module TokenizableOption
4
+ attr_reader :base
5
+
6
+ def tokenize(base)
7
+ @base = base
8
+ tokenize_condition
9
+ end
10
+
11
+ private
12
+
13
+ def tokenize_condition
14
+ Textoken.type_err('tokenize_condition method has to be implemented
15
+ for Options.')
16
+ end
17
+ end
18
+ end
@@ -4,15 +4,14 @@ module Textoken
4
4
  class MoreThan
5
5
  include NumericOption
6
6
 
7
- def tokenize(base)
8
- @base = base
7
+ private
8
+
9
+ def tokenize_condition
9
10
  tokenize_if { |word| word.length > number }
10
11
  end
11
12
 
12
- private
13
-
14
- def validate_option_value(value)
15
- validate { value.class == Fixnum && value >= 0 }
13
+ def validate_option_value
14
+ validate { |value| value >= 0 }
16
15
  end
17
16
  end
18
17
  end
@@ -4,11 +4,12 @@ module Textoken
4
4
  class Only
5
5
  include ConditionalOption
6
6
 
7
+ private
8
+
7
9
  # base.text is raw tokens splitted with ' '
8
10
  # values are Regexps array to search
9
11
  # base.findings, Findings object for pushing matching tokens
10
- def tokenize(base)
11
- @base = base
12
+ def tokenize_condition
12
13
  tokenize_if { |word, regexp| word.match(regexp) }
13
14
  end
14
15
  end
@@ -1,3 +1,3 @@
1
1
  module Textoken
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
data/lib/textoken.rb CHANGED
@@ -9,6 +9,7 @@ require 'textoken/findings'
9
9
  require 'textoken/tokenizer'
10
10
  require 'textoken/scanner'
11
11
 
12
+ require 'textoken/options/modules/tokenizable_option'
12
13
  require 'textoken/options/modules/numeric_option'
13
14
  require 'textoken/options/modules/conditional_option'
14
15
  require 'textoken/options/modules/regexp_option'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textoken
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Cetin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-06 00:00:00.000000000 Z
11
+ date: 2015-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -44,23 +44,9 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '10.0'
47
- - !ruby/object:Gem::Dependency
48
- name: pry
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :development
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '0'
61
- description: "Textoken is a Ruby library for text tokenization. \n This gem extracts
62
- words from text with many customizations. \n It can be used in many fields like
63
- crawling and Natural Language Processing."
47
+ description: Textoken is a Ruby library for text tokenization. This gem extracts words
48
+ from text with many customizations. It can be used in many fields like Web Crawling
49
+ and Natural Language Processing.
64
50
  email:
65
51
  - mcetin.cm@gmail.com
66
52
  executables: []
@@ -80,6 +66,7 @@ files:
80
66
  - lib/textoken/options/modules/conditional_option.rb
81
67
  - lib/textoken/options/modules/numeric_option.rb
82
68
  - lib/textoken/options/modules/regexp_option.rb
69
+ - lib/textoken/options/modules/tokenizable_option.rb
83
70
  - lib/textoken/options/more_than.rb
84
71
  - lib/textoken/options/only.rb
85
72
  - lib/textoken/options/only_regexp.rb
@@ -108,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
95
  version: '0'
109
96
  requirements: []
110
97
  rubyforge_project:
111
- rubygems_version: 2.4.5.1
98
+ rubygems_version: 2.4.8
112
99
  signing_key:
113
100
  specification_version: 4
114
101
  summary: Simple and customizable text tokenization gem.