term-extract 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -1
- data/Gemfile.lock +2 -2
- data/VERSION +1 -1
- data/lib/term-extract.rb +2 -2
- data/term-extract.gemspec +5 -5
- data/test/test_term-extract.rb +18 -0
- metadata +9 -7
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,7 +7,7 @@ GEM
|
|
7
7
|
git (>= 1.2.5)
|
8
8
|
rake
|
9
9
|
rake (0.8.7)
|
10
|
-
rbtagger (0.4.
|
10
|
+
rbtagger (0.4.7)
|
11
11
|
rcov (0.9.9)
|
12
12
|
shoulda (2.11.3)
|
13
13
|
|
@@ -17,6 +17,6 @@ PLATFORMS
|
|
17
17
|
DEPENDENCIES
|
18
18
|
bundler (~> 1.0.0)
|
19
19
|
jeweler (~> 1.5.2)
|
20
|
-
rbtagger
|
20
|
+
rbtagger (~> 0.4.7)
|
21
21
|
rcov
|
22
22
|
shoulda
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.2
|
data/lib/term-extract.rb
CHANGED
@@ -107,7 +107,7 @@ class TermExtract
|
|
107
107
|
if @collapse_terms
|
108
108
|
terms.each_key do |term1|
|
109
109
|
terms.each_key do |term2|
|
110
|
-
terms.delete(term2) if term1.length > term2.length && (term1 =~ /[^A-Za-z0-9]#{term2}$/ || term1 =~ /^#{term2}[^A-Za-z0-9]/)
|
110
|
+
terms.delete(term2) if term1.length > term2.length && (term1 =~ /[^A-Za-z0-9]#{Regexp.escape(term2)}$/ || term1 =~ /^#{Regexp.escape(term2)}[^A-Za-z0-9]/)
|
111
111
|
end
|
112
112
|
end
|
113
113
|
end
|
@@ -119,7 +119,7 @@ class TermExtract
|
|
119
119
|
terms
|
120
120
|
end
|
121
121
|
|
122
|
-
protected
|
122
|
+
protected
|
123
123
|
def preprocess_tags(pos)
|
124
124
|
# Add in full stops to tag list to allow multiterms to work
|
125
125
|
tags = []
|
data/term-extract.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{term-extract}
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["rattle"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-06-03}
|
13
13
|
s.default_executable = %q{term-extract}
|
14
14
|
s.email = %q{robl@rjlee.net}
|
15
15
|
s.executables = ["term-extract"]
|
@@ -46,20 +46,20 @@ Gem::Specification.new do |s|
|
|
46
46
|
s.specification_version = 3
|
47
47
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
-
s.add_runtime_dependency(%q<rbtagger>, ["
|
49
|
+
s.add_runtime_dependency(%q<rbtagger>, ["~> 0.4.7"])
|
50
50
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
51
51
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
52
52
|
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
53
53
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
54
54
|
else
|
55
|
-
s.add_dependency(%q<rbtagger>, ["
|
55
|
+
s.add_dependency(%q<rbtagger>, ["~> 0.4.7"])
|
56
56
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
57
57
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
58
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
59
59
|
s.add_dependency(%q<rcov>, [">= 0"])
|
60
60
|
end
|
61
61
|
else
|
62
|
-
s.add_dependency(%q<rbtagger>, ["
|
62
|
+
s.add_dependency(%q<rbtagger>, ["~> 0.4.7"])
|
63
63
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
64
64
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
65
65
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
data/test/test_term-extract.rb
CHANGED
@@ -175,5 +175,23 @@ SOURCE
|
|
175
175
|
end
|
176
176
|
|
177
177
|
end
|
178
|
+
|
179
|
+
context 'when having regex characters in terms' do
|
180
|
+
should 'not break when * is involved' do
|
181
|
+
doc = 'Siam Square Soi 4, Rama 1 Rd, Pathum Wan, Bangkok, 10330 *Bangkok Trip'
|
182
|
+
assert_nothing_raised do
|
183
|
+
TermExtract.extract(doc)
|
184
|
+
end
|
185
|
+
end
|
178
186
|
|
187
|
+
should 'not break when ? is involved' do
|
188
|
+
doc = <<EOF
|
189
|
+
We sat and watched the very accommodating waitresses tend to a healthy traffic of middle-aged male Japanese patrons and wondered if we had somehow stumbled unwittingly into KL's version of a kyabakura.
|
190
|
+
Nonbei is celebrating its anniversary this Wednesday, 25th November 2009 by offering a RM110++ deal for all-you-can-eat (drinks up till 10PM).
|
191
|
+
EOF
|
192
|
+
assert_nothing_raised do
|
193
|
+
TermExtract.extract(doc)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
179
197
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: term-extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 2
|
10
|
+
version: 0.5.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- rattle
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-03 00:00:00 +01:00
|
19
19
|
default_executable: term-extract
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -24,12 +24,14 @@ dependencies:
|
|
24
24
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 1
|
30
30
|
segments:
|
31
31
|
- 0
|
32
|
-
|
32
|
+
- 4
|
33
|
+
- 7
|
34
|
+
version: 0.4.7
|
33
35
|
requirement: *id001
|
34
36
|
type: :runtime
|
35
37
|
- !ruby/object:Gem::Dependency
|