term-extract 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -3,7 +3,7 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
 
6
- gem 'rbtagger', ">=0"
6
+ gem 'rbtagger', "~>0.4.7"
7
7
 
8
8
  # Add dependencies to develop your gem here.
9
9
  # Include everything needed to run rake, tests, features, etc.
@@ -7,7 +7,7 @@ GEM
7
7
  git (>= 1.2.5)
8
8
  rake
9
9
  rake (0.8.7)
10
- rbtagger (0.4.6)
10
+ rbtagger (0.4.7)
11
11
  rcov (0.9.9)
12
12
  shoulda (2.11.3)
13
13
 
@@ -17,6 +17,6 @@ PLATFORMS
17
17
  DEPENDENCIES
18
18
  bundler (~> 1.0.0)
19
19
  jeweler (~> 1.5.2)
20
- rbtagger
20
+ rbtagger (~> 0.4.7)
21
21
  rcov
22
22
  shoulda
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.1
1
+ 0.5.2
@@ -107,7 +107,7 @@ class TermExtract
107
107
  if @collapse_terms
108
108
  terms.each_key do |term1|
109
109
  terms.each_key do |term2|
110
- terms.delete(term2) if term1.length > term2.length && (term1 =~ /[^A-Za-z0-9]#{term2}$/ || term1 =~ /^#{term2}[^A-Za-z0-9]/)
110
+ terms.delete(term2) if term1.length > term2.length && (term1 =~ /[^A-Za-z0-9]#{Regexp.escape(term2)}$/ || term1 =~ /^#{Regexp.escape(term2)}[^A-Za-z0-9]/)
111
111
  end
112
112
  end
113
113
  end
@@ -119,7 +119,7 @@ class TermExtract
119
119
  terms
120
120
  end
121
121
 
122
- protected
122
+ protected
123
123
  def preprocess_tags(pos)
124
124
  # Add in full stops to tag list to allow multiterms to work
125
125
  tags = []
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{term-extract}
8
- s.version = "0.5.1"
8
+ s.version = "0.5.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["rattle"]
12
- s.date = %q{2011-01-07}
12
+ s.date = %q{2011-06-03}
13
13
  s.default_executable = %q{term-extract}
14
14
  s.email = %q{robl@rjlee.net}
15
15
  s.executables = ["term-extract"]
@@ -46,20 +46,20 @@ Gem::Specification.new do |s|
46
46
  s.specification_version = 3
47
47
 
48
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
- s.add_runtime_dependency(%q<rbtagger>, [">= 0"])
49
+ s.add_runtime_dependency(%q<rbtagger>, ["~> 0.4.7"])
50
50
  s.add_development_dependency(%q<shoulda>, [">= 0"])
51
51
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
52
  s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
53
53
  s.add_development_dependency(%q<rcov>, [">= 0"])
54
54
  else
55
- s.add_dependency(%q<rbtagger>, [">= 0"])
55
+ s.add_dependency(%q<rbtagger>, ["~> 0.4.7"])
56
56
  s.add_dependency(%q<shoulda>, [">= 0"])
57
57
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
58
  s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
59
59
  s.add_dependency(%q<rcov>, [">= 0"])
60
60
  end
61
61
  else
62
- s.add_dependency(%q<rbtagger>, [">= 0"])
62
+ s.add_dependency(%q<rbtagger>, ["~> 0.4.7"])
63
63
  s.add_dependency(%q<shoulda>, [">= 0"])
64
64
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
65
65
  s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
@@ -175,5 +175,23 @@ SOURCE
175
175
  end
176
176
 
177
177
  end
178
+
179
+ context 'when having regex characters in terms' do
180
+ should 'not break when * is involved' do
181
+ doc = 'Siam Square Soi 4, Rama 1 Rd, Pathum Wan, Bangkok, 10330 *Bangkok Trip'
182
+ assert_nothing_raised do
183
+ TermExtract.extract(doc)
184
+ end
185
+ end
178
186
 
187
+ should 'not break when ? is involved' do
188
+ doc = <<EOF
189
+ We sat and watched the very accommodating waitresses tend to a healthy traffic of middle-aged male Japanese patrons and wondered if we had somehow stumbled unwittingly into KL's version of a kyabakura.
190
+ Nonbei is celebrating its anniversary this Wednesday, 25th November 2009 by offering a RM110++ deal for all-you-can-eat (drinks up till 10PM).
191
+ EOF
192
+ assert_nothing_raised do
193
+ TermExtract.extract(doc)
194
+ end
195
+ end
196
+ end
179
197
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: term-extract
3
3
  version: !ruby/object:Gem::Version
4
- hash: 9
4
+ hash: 15
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 1
10
- version: 0.5.1
9
+ - 2
10
+ version: 0.5.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - rattle
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-07 00:00:00 +00:00
18
+ date: 2011-06-03 00:00:00 +01:00
19
19
  default_executable: term-extract
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -24,12 +24,14 @@ dependencies:
24
24
  version_requirements: &id001 !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
- - - ">="
27
+ - - ~>
28
28
  - !ruby/object:Gem::Version
29
- hash: 3
29
+ hash: 1
30
30
  segments:
31
31
  - 0
32
- version: "0"
32
+ - 4
33
+ - 7
34
+ version: 0.4.7
33
35
  requirement: *id001
34
36
  type: :runtime
35
37
  - !ruby/object:Gem::Dependency