rbtagger 0.2.8 → 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  have_header: checking for stdlib.h... -------------------- yes
2
2
 
3
- "gcc -E -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -Os -pipe -fno-common conftest.c -o conftest.i"
3
+ "gcc -E -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -o conftest.i"
4
4
  checked program was:
5
5
  /* begin */
6
6
  1: #include <stdlib.h>
@@ -10,7 +10,7 @@ checked program was:
10
10
 
11
11
  have_header: checking for string.h... -------------------- yes
12
12
 
13
- "gcc -E -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -Os -pipe -fno-common conftest.c -o conftest.i"
13
+ "gcc -E -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -o conftest.i"
14
14
  checked program was:
15
15
  /* begin */
16
16
  1: #include <string.h>
@@ -20,7 +20,7 @@ checked program was:
20
20
 
21
21
  have_library: checking for main() in -lc... -------------------- yes
22
22
 
23
- "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lruby -lc -lpthread -ldl -lm "
23
+ "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lruby-static -lc -lpthread -ldl -lcrypt -lm -lc"
24
24
  checked program was:
25
25
  /* begin */
26
26
  1: /*top*/
@@ -32,7 +32,7 @@ checked program was:
32
32
 
33
33
  have_func: checking for snprintf() in stdio.h... -------------------- yes
34
34
 
35
- "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lc -lruby -lc -lpthread -ldl -lm "
35
+ "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lc -lruby-static -lc -lpthread -ldl -lcrypt -lm -lc"
36
36
  checked program was:
37
37
  /* begin */
38
38
  1: #include <stdio.h>
@@ -1,6 +1,6 @@
1
1
  have_library: checking for main() in -lc... -------------------- yes
2
2
 
3
- "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lruby -lc -lpthread -ldl -lm "
3
+ "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lruby-static -lc -lpthread -ldl -lcrypt -lm -lc"
4
4
  checked program was:
5
5
  /* begin */
6
6
  1: /*top*/
@@ -12,7 +12,7 @@ checked program was:
12
12
 
13
13
  have_library: checking for main() in -lstdc++... -------------------- yes
14
14
 
15
- "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lc -lruby -lstdc++ -lc -lpthread -ldl -lm "
15
+ "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lc -lruby-static -lstdc++ -lc -lpthread -ldl -lcrypt -lm -lc"
16
16
  checked program was:
17
17
  /* begin */
18
18
  1: /*top*/
@@ -101,14 +101,15 @@ std::vector<std::string> NWordTagger::execute( const char *text, short max )cons
101
101
  for( short j = 1; (j <= this->nwords) && ((i+j) < words.size()); ++j ) {
102
102
  matched = this->tags.find( match_word );
103
103
  if( matched != this->tags.end() ){
104
- //printf( "word: %d:(%s->%s)\n", i, match_word.c_str(), matched->second.c_str() );
105
104
  std::map<std::string, int>::iterator mloc = matched_tags.find( matched->second );
106
105
  if( mloc == matched_tags.end() ) {
107
106
  matched_tags[matched->second] = 1; // count 1
107
+ //printf( "word: %d:(%s->%s) %d, hits: 1\n", i, match_word.c_str(), matched->second.c_str(), j );
108
108
  }
109
109
  else {
110
110
  mloc->second++;
111
111
  if( max_count < mloc->second ) { max_count = mloc->second; }
112
+ //printf( "word: %d:(%s->%s) %d, hits: %d\n", i, match_word.c_str(), matched->second.c_str(), j, mloc->second );
112
113
  }
113
114
  }
114
115
  // stem each word and compare against our tag bank
@@ -129,11 +130,22 @@ std::vector<std::string> NWordTagger::execute( const char *text, short max )cons
129
130
  }
130
131
  }
131
132
  }
133
+
134
+ std::vector< std::string > reduced_tags;
135
+
136
+ // now we have a list of tags that match within the document text, check if we need to reduce the tags
137
+ if( matched_tags.size() < max ) {
138
+ // prepare the return vector
139
+ for( std::map<std::string, int>::iterator mloc = matched_tags.begin(); mloc != matched_tags.end(); ++mloc ){
140
+ reduced_tags.push_back( mloc->first );
141
+ }
142
+ return reduced_tags;
143
+ }
132
144
 
133
145
  // now that we have all the matched tags reduce to max using the tag frequency as a reduction measure
134
146
  std::vector< std::pair<std::string,int> > sorted_tags;
135
147
 
136
- //printf( "max frequency: %d\n", max_count );
148
+ //printf( "max frequency: %d, total tagged: %d, reducing to %d\n", max_count, matched_tags.size(), max );
137
149
  for( std::map<std::string, int>::iterator mloc = matched_tags.begin(); mloc != matched_tags.end(); ++mloc ){
138
150
  //printf( "word: %s, frequency: %d\n", mloc->first.c_str(), mloc->second );
139
151
  sorted_tags.push_back(*mloc);
@@ -142,14 +154,13 @@ std::vector<std::string> NWordTagger::execute( const char *text, short max )cons
142
154
  // sort the tags in frequency order
143
155
  std::sort( sorted_tags.begin(), sorted_tags.end(), WordComparitor() );
144
156
 
145
- std::vector< std::string > reduced_tags;
146
157
 
147
158
  std::vector< std::pair<std::string, int> >::iterator mloc;
148
159
  do {
149
160
  for(mloc = sorted_tags.begin(); mloc != sorted_tags.end(); ++mloc ) {
150
161
  std::pair< std::string, int > word_freq = *mloc;
151
- // printf( "word: %s, frequency: %d\n", word_freq.first.c_str(), word_freq.second );
152
- //printf( "word: %s, frequency: %d\n", mloc->first.c_str(), mloc->second );
162
+ printf( "word: %s, frequency: %d\n", word_freq.first.c_str(), word_freq.second );
163
+ printf( "word: %s, frequency: %d\n", mloc->first.c_str(), mloc->second );
153
164
  if( word_freq.second < max_count ) {
154
165
  sorted_tags.erase( mloc );
155
166
  break;
@@ -2,7 +2,7 @@ module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- TINY = 8
5
+ TINY = 9
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -32,9 +32,6 @@ The details of her visitation, however, are unclear.
32
32
  Asked by Us if she were happy with the court outcome, Spears (clutching an Ed Hardy purse) smiled and told Us, "Yes."
33
33
  Next up: A status hearing set for July 15.
34
34
  The couple last appeared in court May 6. Spears was granted extended visitation — three days a week from 9 a.m. to 5 p.m. — of Sean Preston, 2, and Jayden James, 20 months.
35
- )
36
- SAMPLE_DOC3=%q(
37
- TMZ.com: Britney celebrated getting overnights with her kids by going on a wild shopping trip for herself.With L.A.'s finest at her service, it was a total clusterf**k outside of Fred Segal as Brit Brit made her way out. The scene was crazy -- and it was all... Read more
38
35
  )
39
36
  def setup
40
37
  if !defined?($tagger)
@@ -137,8 +134,6 @@ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild
137
134
  assert results.include?(["Britney Spears", "NNP", 6])
138
135
  assert results.include?(["Jamie Spears", "NNP", 12])
139
136
  # puts results.inspect
140
- results = tagger.suggest( SAMPLE_DOC3, 5 )
141
- puts results.inspect
142
137
  end
143
138
 
144
139
  private
@@ -16,6 +16,14 @@ class TestWordTagger < Test::Unit::TestCase
16
16
  puts "Duration: #{Time.now - timer} sec"
17
17
  end
18
18
 
19
+ def test_sample_bug
20
+ tags = ["foo", "bar", "baz", "squishy", "yummy"]
21
+ txt = 'This is some sample text. Foo walked into a bar. The bartender said "What can I get you?" Foo said he wanted something yummy - like a baz.'
22
+ tagger = Word::Tagger.new tags, :words => 4
23
+ result_tags = tagger.execute( txt )
24
+ assert_equal ["bar", "baz", "foo", "yummy"], result_tags
25
+ end
26
+
19
27
  def test_ngram_size3
20
28
  timer = Time.now
21
29
  text = "This body of text contains something like ventricular septal defect"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-07-10 00:00:00 -04:00
12
+ date: 2008-08-28 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -28,8 +28,8 @@ email:
28
28
  executables: []
29
29
 
30
30
  extensions:
31
- - ext/rule_tagger/extconf.rb
32
31
  - ext/word_tagger/extconf.rb
32
+ - ext/rule_tagger/extconf.rb
33
33
  extra_rdoc_files:
34
34
  - History.txt
35
35
  - License.txt
@@ -142,8 +142,8 @@ rdoc_options:
142
142
  - README.txt
143
143
  require_paths:
144
144
  - lib
145
- - ext/rule_tagger
146
145
  - ext/word_tagger
146
+ - ext/rule_tagger
147
147
  required_ruby_version: !ruby/object:Gem::Requirement
148
148
  requirements:
149
149
  - - ">="
@@ -164,6 +164,6 @@ signing_key:
164
164
  specification_version: 2
165
165
  summary: A Simple Ruby Rule-Based Part of Speech Tagger
166
166
  test_files:
167
- - test/test_helper.rb
168
167
  - test/test_rule_tagger.rb
168
+ - test/test_helper.rb
169
169
  - test/test_word_tagger.rb