term-extract 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/Gemfile.lock +2 -2
- data/VERSION +1 -1
- data/lib/term-extract.rb +2 -2
- data/term-extract.gemspec +5 -5
- data/test/test_term-extract.rb +18 -0
- metadata +9 -7
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,7 +7,7 @@ GEM
|
|
7
7
|
git (>= 1.2.5)
|
8
8
|
rake
|
9
9
|
rake (0.8.7)
|
10
|
-
rbtagger (0.4.
|
10
|
+
rbtagger (0.4.7)
|
11
11
|
rcov (0.9.9)
|
12
12
|
shoulda (2.11.3)
|
13
13
|
|
@@ -17,6 +17,6 @@ PLATFORMS
|
|
17
17
|
DEPENDENCIES
|
18
18
|
bundler (~> 1.0.0)
|
19
19
|
jeweler (~> 1.5.2)
|
20
|
-
rbtagger
|
20
|
+
rbtagger (~> 0.4.7)
|
21
21
|
rcov
|
22
22
|
shoulda
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.2
|
data/lib/term-extract.rb
CHANGED
@@ -107,7 +107,7 @@ class TermExtract
|
|
107
107
|
if @collapse_terms
|
108
108
|
terms.each_key do |term1|
|
109
109
|
terms.each_key do |term2|
|
110
|
-
terms.delete(term2) if term1.length > term2.length && (term1 =~ /[^A-Za-z0-9]#{term2}$/ || term1 =~ /^#{term2}[^A-Za-z0-9]/)
|
110
|
+
terms.delete(term2) if term1.length > term2.length && (term1 =~ /[^A-Za-z0-9]#{Regexp.escape(term2)}$/ || term1 =~ /^#{Regexp.escape(term2)}[^A-Za-z0-9]/)
|
111
111
|
end
|
112
112
|
end
|
113
113
|
end
|
@@ -119,7 +119,7 @@ class TermExtract
|
|
119
119
|
terms
|
120
120
|
end
|
121
121
|
|
122
|
-
protected
|
122
|
+
protected
|
123
123
|
def preprocess_tags(pos)
|
124
124
|
# Add in full stops to tag list to allow multiterms to work
|
125
125
|
tags = []
|
data/term-extract.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{term-extract}
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["rattle"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-06-03}
|
13
13
|
s.default_executable = %q{term-extract}
|
14
14
|
s.email = %q{robl@rjlee.net}
|
15
15
|
s.executables = ["term-extract"]
|
@@ -46,20 +46,20 @@ Gem::Specification.new do |s|
|
|
46
46
|
s.specification_version = 3
|
47
47
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
-
s.add_runtime_dependency(%q<rbtagger>, ["
|
49
|
+
s.add_runtime_dependency(%q<rbtagger>, ["~> 0.4.7"])
|
50
50
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
51
51
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
52
52
|
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
53
53
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
54
54
|
else
|
55
|
-
s.add_dependency(%q<rbtagger>, ["
|
55
|
+
s.add_dependency(%q<rbtagger>, ["~> 0.4.7"])
|
56
56
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
57
57
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
58
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
59
59
|
s.add_dependency(%q<rcov>, [">= 0"])
|
60
60
|
end
|
61
61
|
else
|
62
|
-
s.add_dependency(%q<rbtagger>, ["
|
62
|
+
s.add_dependency(%q<rbtagger>, ["~> 0.4.7"])
|
63
63
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
64
64
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
65
65
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
data/test/test_term-extract.rb
CHANGED
@@ -175,5 +175,23 @@ SOURCE
|
|
175
175
|
end
|
176
176
|
|
177
177
|
end
|
178
|
+
|
179
|
+
context 'when having regex characters in terms' do
|
180
|
+
should 'not break when * is involved' do
|
181
|
+
doc = 'Siam Square Soi 4, Rama 1 Rd, Pathum Wan, Bangkok, 10330 *Bangkok Trip'
|
182
|
+
assert_nothing_raised do
|
183
|
+
TermExtract.extract(doc)
|
184
|
+
end
|
185
|
+
end
|
178
186
|
|
187
|
+
should 'not break when ? is involved' do
|
188
|
+
doc = <<EOF
|
189
|
+
We sat and watched the very accommodating waitresses tend to a healthy traffic of middle-aged male Japanese patrons and wondered if we had somehow stumbled unwittingly into KL's version of a kyabakura.
|
190
|
+
Nonbei is celebrating its anniversary this Wednesday, 25th November 2009 by offering a RM110++ deal for all-you-can-eat (drinks up till 10PM).
|
191
|
+
EOF
|
192
|
+
assert_nothing_raised do
|
193
|
+
TermExtract.extract(doc)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
179
197
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: term-extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 2
|
10
|
+
version: 0.5.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- rattle
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-03 00:00:00 +01:00
|
19
19
|
default_executable: term-extract
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -24,12 +24,14 @@ dependencies:
|
|
24
24
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 1
|
30
30
|
segments:
|
31
31
|
- 0
|
32
|
-
|
32
|
+
- 4
|
33
|
+
- 7
|
34
|
+
version: 0.4.7
|
33
35
|
requirement: *id001
|
34
36
|
type: :runtime
|
35
37
|
- !ruby/object:Gem::Dependency
|