rbtagger 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -18,6 +18,7 @@ ext/rule_tagger/lex.c
18
18
  ext/rule_tagger/lex.h
19
19
  ext/rule_tagger/memory.c
20
20
  ext/rule_tagger/memory.h
21
+ ext/rule_tagger/mkmf.log
21
22
  ext/rule_tagger/rbtagger.c
22
23
  ext/rule_tagger/registry.c
23
24
  ext/rule_tagger/registry.h
@@ -31,12 +32,12 @@ ext/rule_tagger/tagger.h
31
32
  ext/rule_tagger/useful.c
32
33
  ext/rule_tagger/useful.h
33
34
  ext/word_tagger/extconf.rb
35
+ ext/word_tagger/mkmf.log
34
36
  ext/word_tagger/porter_stemmer.c
35
37
  ext/word_tagger/porter_stemmer.h
36
38
  ext/word_tagger/rtagger.cc
37
39
  ext/word_tagger/tagger.cc
38
40
  ext/word_tagger/tagger.h
39
- ext/word_tagger/tagger.rb
40
41
  ext/word_tagger/test.rb
41
42
  ext/word_tagger/test/Makefile
42
43
  ext/word_tagger/test/doc.txt
@@ -44,6 +45,7 @@ ext/word_tagger/test/test.cc
44
45
  lib/brill/tagger.rb
45
46
  lib/rbtagger.rb
46
47
  lib/rbtagger/version.rb
48
+ lib/word/tagger.rb
47
49
  script/console
48
50
  script/destroy
49
51
  script/generate
@@ -51,6 +53,9 @@ script/txt2html
51
53
  setup.rb
52
54
  tasks/deployment.rake
53
55
  tasks/environment.rake
56
+ tasks/extconf.rake
57
+ tasks/extconf/rule_tagger.rake
58
+ tasks/extconf/word_tagger.rake
54
59
  tasks/website.rake
55
60
  test/CONTEXTUALRULEFILE
56
61
  test/LEXICALRULEFILE
@@ -65,8 +70,10 @@ test/docs/doc6.txt
65
70
  test/docs/doc7.txt
66
71
  test/docs/doc8.txt
67
72
  test/docs/doc9.txt
68
- test/tagger_test.rb
73
+ test/fixtures/tags.txt
69
74
  test/test_helper.rb
75
+ test/test_rule_tagger.rb
76
+ test/test_word_tagger.rb
70
77
  tools/rakehelp.rb
71
78
  website/index.html
72
79
  website/index.txt
data/Rakefile CHANGED
@@ -2,32 +2,3 @@ require 'config/requirements'
2
2
  require 'config/hoe' # setup Hoe + all gem configuration
3
3
 
4
4
  Dir['tasks/**/*.rake'].each { |rake| load rake }
5
-
6
- # redefine release
7
-
8
- desc 'Package and upload the release to rubyforge.'
9
- task :release_current => [:clean, :package] do |t|
10
- require 'config/hoe'
11
- version = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
12
- name = $hoe.name
13
- rubyforge_name = $hoe.rubyforge_name
14
- description = $hoe.description
15
- pkg = "pkg/#{name}-#{version}"
16
- abort "Package doesn't exist => #{pkg}" if !File.exist?(pkg)
17
-
18
- rf = RubyForge.new
19
- puts "Logging in"
20
- rf.login
21
-
22
- c = rf.userconfig
23
- c["release_notes"] = description if description
24
- c["release_changes"] = $hoe.changes if $hoe.changes
25
- c["preformatted"] = true
26
-
27
- files = [(@need_tar ? "#{pkg}.tgz" : nil),
28
- (@need_zip ? "#{pkg}.zip" : nil),
29
- "#{pkg}.gem"].compact
30
-
31
- puts "Releasing #{rubyforge_name} v. #{version}"
32
- rf.add_release 'ruletagger', 'ruletagger', version, *files
33
- end
data/config/hoe.rb CHANGED
@@ -4,7 +4,7 @@ AUTHOR = 'Todd A. Fisher' # can also be an array of Authors
4
4
  EMAIL = 'todd.fisher@gmail.com'
5
5
  DESCRIPTION = "A Simple Ruby Rule-Based Part of Speech Tagger"
6
6
  GEM_NAME = 'rbtagger' # what ppl will type to install your gem
7
- RUBYFORGE_PROJECT = 'ruletagger' # The unix name for your project
7
+ RUBYFORGE_PROJECT = 'rbtagger' # The unix name for your project
8
8
  HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
9
  DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
10
  EXTRA_DEPENDENCIES = [
@@ -57,7 +57,7 @@ $hoe = Hoe.new(GEM_NAME, VERS) do |p|
57
57
  p.description = DESCRIPTION
58
58
  p.summary = DESCRIPTION
59
59
  p.url = HOMEPATH
60
- p.rubyforge_name = PATH #RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
60
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
61
61
  p.test_globs = ["test/**/test_*.rb"]
62
62
  p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
63
63
 
@@ -0,0 +1,46 @@
1
+ have_header: checking for stdlib.h... -------------------- yes
2
+
3
+ "gcc -E -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -Os -pipe -fno-common conftest.c -o conftest.i"
4
+ checked program was:
5
+ /* begin */
6
+ 1: #include <stdlib.h>
7
+ /* end */
8
+
9
+ --------------------
10
+
11
+ have_header: checking for string.h... -------------------- yes
12
+
13
+ "gcc -E -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -Os -pipe -fno-common conftest.c -o conftest.i"
14
+ checked program was:
15
+ /* begin */
16
+ 1: #include <string.h>
17
+ /* end */
18
+
19
+ --------------------
20
+
21
+ have_library: checking for main() in -lc... -------------------- yes
22
+
23
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lruby -lc -lpthread -ldl -lm "
24
+ checked program was:
25
+ /* begin */
26
+ 1: /*top*/
27
+ 2: int main() { return 0; }
28
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
29
+ /* end */
30
+
31
+ --------------------
32
+
33
+ have_func: checking for snprintf() in stdio.h... -------------------- yes
34
+
35
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lc -lruby -lc -lpthread -ldl -lm "
36
+ checked program was:
37
+ /* begin */
38
+ 1: #include <stdio.h>
39
+ 2:
40
+ 3: /*top*/
41
+ 4: int main() { return 0; }
42
+ 5: int t() { void ((*volatile p)()); p = (void ((*)()))snprintf; return 0; }
43
+ /* end */
44
+
45
+ --------------------
46
+
@@ -0,0 +1,24 @@
1
+ have_library: checking for main() in -lc... -------------------- yes
2
+
3
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lruby -lc -lpthread -ldl -lm "
4
+ checked program was:
5
+ /* begin */
6
+ 1: /*top*/
7
+ 2: int main() { return 0; }
8
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
9
+ /* end */
10
+
11
+ --------------------
12
+
13
+ have_library: checking for main() in -lstdc++... -------------------- yes
14
+
15
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lc -lruby -lstdc++ -lc -lpthread -ldl -lm "
16
+ checked program was:
17
+ /* begin */
18
+ 1: /*top*/
19
+ 2: int main() { return 0; }
20
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
21
+ /* end */
22
+
23
+ --------------------
24
+
@@ -70,10 +70,10 @@ VALUE Tagger_alloc(VALUE klass)
70
70
  return object;
71
71
  }
72
72
 
73
- extern "C" void Init_rtagger()
73
+ extern "C" void Init_word_tagger()
74
74
  {
75
75
  rb_Tagger = rb_define_module( "Tagger" );
76
- rb_NWordTagger = rb_define_class_under( rb_Tagger, "NWordTagger", rb_cObject );
76
+ rb_NWordTagger = rb_define_class_under( rb_Tagger, "WordTagger", rb_cObject );
77
77
 
78
78
  rb_define_alloc_func( rb_NWordTagger, Tagger_alloc );
79
79
 
@@ -33,12 +33,21 @@ static std::vector<std::string> word_split(const std::string& s)
33
33
  return std::vector<std::string>(std::istream_iterator<std::string>(is), std::istream_iterator<std::string>());
34
34
  }
35
35
 
36
+ static void word_downcase( std::string &word )
37
+ {
38
+ for( int j = 0; j < word.size(); ++j ) {
39
+ word[j] = tolower( word[j] );
40
+ }
41
+ }
42
+
36
43
  NWordTagger::NWordTagger()
37
44
  : nwords(2), stemmer(porter_stemmer_new()){
38
45
  }
39
46
  NWordTagger::~NWordTagger(){
40
47
  porter_stemmer_free(stemmer);
41
48
  }
49
+
50
+
42
51
  void NWordTagger::loadTags( const std::set<std::string> &tags )
43
52
  {
44
53
  for( std::set<std::string>::iterator i = tags.begin(); i != tags.end(); ++i ){
@@ -51,14 +60,14 @@ void NWordTagger::loadTags( const std::set<std::string> &tags )
51
60
  stemmed += this->stemWord(words[j]) + " ";
52
61
  }
53
62
  stemmed = stemmed.substr(0,stemmed.length()-1);
54
- this->tags[stemmed] = word;
55
- //printf( "word: %s -> %s\n", word.c_str(), stemmed.c_str() );
56
63
  }
57
64
  else{
58
65
  stemmed = this->stemWord(*i);
59
- //printf( "word: %s -> %s\n", word.c_str(), stemmed.c_str() );
60
- this->tags[stemmed] = word;
61
66
  }
67
+ // downcase stemmed
68
+ word_downcase( stemmed );
69
+ //printf( "word: %s -> %s\n", word.c_str(), stemmed.c_str() );
70
+ this->tags[stemmed] = word;
62
71
 
63
72
  }
64
73
  }
@@ -84,6 +93,7 @@ std::vector<std::string> NWordTagger::execute( const char *text, short max )cons
84
93
 
85
94
  // get the stemmed word at position i
86
95
  match_word = this->stemWord(words[i]);
96
+ word_downcase( match_word );
87
97
 
88
98
  // now scan ahead nwords positions searching our tags table for matches
89
99
  for( short j = 1; (j <= this->nwords) && ((i+j) < words.size()); ++j ) {
@@ -1,8 +1,8 @@
1
1
  module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 0
5
- TINY = 1
4
+ MINOR = 2
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/rbtagger.rb CHANGED
@@ -2,5 +2,12 @@ $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
4
  module RbTagger
5
-
5
+ class << self
6
+ def tags_from_file( file )
7
+ File.read(file).split("\n").map{|t| t.strip}
8
+ end
9
+ end
6
10
  end
11
+
12
+ require 'word/tagger'
13
+ require 'brill/tagger'
@@ -0,0 +1,18 @@
1
+ module Word
2
+ require 'word_tagger'
3
+ class Tagger < Tagger::WordTagger
4
+ def initialize( tags, options = {} )
5
+ if tags.is_a?(String) and File.exist?(tags)
6
+ load_tags( RbTagger.tags_from_file( tags ) )
7
+ else
8
+ load_tags( tags )
9
+ end
10
+ set_words( options[:words] || 2 )
11
+ end
12
+
13
+ def execute( text )
14
+ # strip non alpha characters
15
+ super( text.gsub(/[^\w]/,' ') )
16
+ end
17
+ end
18
+ end
data/script/txt2html CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  GEM_NAME = 'rbtagger' # what ppl will type to install your gem
4
- RUBYFORGE_PROJECT = 'ruletagger'
4
+ RUBYFORGE_PROJECT = 'rbtagger'
5
5
 
6
6
  require 'rubygems'
7
7
  begin
@@ -0,0 +1,43 @@
1
+ namespace :extconf do
2
+ extension = File.basename(__FILE__, '.rake')
3
+
4
+ ext = "ext/#{extension}"
5
+ ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
6
+ ext_files = FileList[
7
+ "#{ext}/*.c",
8
+ "#{ext}/*.h",
9
+ "#{ext}/*.rl",
10
+ "#{ext}/extconf.rb",
11
+ "#{ext}/Makefile",
12
+ # "lib"
13
+ ]
14
+
15
+
16
+ task :compile => extension do
17
+ if Dir.glob("**/#{extension}.{o,so,dll,bundle}").length == 0
18
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
19
+ STDERR.puts "Gem actually failed to build. Your system is"
20
+ STDERR.puts "NOT configured properly to build #{GEM_NAME}."
21
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
22
+ exit(1)
23
+ end
24
+ end
25
+
26
+ desc "Builds just the #{extension} extension"
27
+ task extension.to_sym => ["#{ext}/Makefile", ext_so ]
28
+
29
+ file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
30
+ Dir.chdir(ext) do ruby "extconf.rb" end
31
+ end
32
+
33
+ file ext_so => ext_files do
34
+ Dir.chdir(ext) do
35
+ sh(PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
36
+ if !ok
37
+ require "fileutils"
38
+ FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,43 @@
1
+ namespace :extconf do
2
+ extension = File.basename(__FILE__, '.rake')
3
+
4
+ ext = "ext/#{extension}"
5
+ ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
6
+ ext_files = FileList[
7
+ "#{ext}/*.c",
8
+ "#{ext}/*.h",
9
+ "#{ext}/*.rl",
10
+ "#{ext}/extconf.rb",
11
+ "#{ext}/Makefile",
12
+ # "lib"
13
+ ]
14
+
15
+
16
+ task :compile => extension do
17
+ if Dir.glob("**/#{extension}.{o,so,dll,bundle}").length == 0
18
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
19
+ STDERR.puts "Gem actually failed to build. Your system is"
20
+ STDERR.puts "NOT configured properly to build #{GEM_NAME}."
21
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
22
+ exit(1)
23
+ end
24
+ end
25
+
26
+ desc "Builds just the #{extension} extension"
27
+ task extension.to_sym => ["#{ext}/Makefile", ext_so ]
28
+
29
+ file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
30
+ Dir.chdir(ext) do ruby "extconf.rb" end
31
+ end
32
+
33
+ file ext_so => ext_files do
34
+ Dir.chdir(ext) do
35
+ sh(PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
36
+ if !ok
37
+ require "fileutils"
38
+ FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,18 @@
1
+ namespace :extconf do
2
+ desc "Compiles the Ruby extension"
3
+ task :compile
4
+ end
5
+
6
+ BIN = "*.{bundle,jar,so,obj,pdb,lib,def,exp}"
7
+
8
+ task :compile => "extconf:compile" do
9
+ Dir["ext/**/*.{bundle,so,dll}"].each do|lib|
10
+ sh "cp #{lib} lib/"
11
+ end
12
+ end
13
+
14
+ task :test => :compile
15
+
16
+ $hoe.clean_globs |= ["ext/**/#{BIN}", "lib/**/#{BIN}", 'ext/word_tagger/Makefile', 'ext/rule_tagger/Makefile']
17
+ $hoe.spec.require_paths = Dir['{lib,ext/*}']
18
+ $hoe.spec.extensions = FileList["ext/**/extconf.rb"].to_a