rbtagger 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -18,6 +18,7 @@ ext/rule_tagger/lex.c
18
18
  ext/rule_tagger/lex.h
19
19
  ext/rule_tagger/memory.c
20
20
  ext/rule_tagger/memory.h
21
+ ext/rule_tagger/mkmf.log
21
22
  ext/rule_tagger/rbtagger.c
22
23
  ext/rule_tagger/registry.c
23
24
  ext/rule_tagger/registry.h
@@ -31,12 +32,12 @@ ext/rule_tagger/tagger.h
31
32
  ext/rule_tagger/useful.c
32
33
  ext/rule_tagger/useful.h
33
34
  ext/word_tagger/extconf.rb
35
+ ext/word_tagger/mkmf.log
34
36
  ext/word_tagger/porter_stemmer.c
35
37
  ext/word_tagger/porter_stemmer.h
36
38
  ext/word_tagger/rtagger.cc
37
39
  ext/word_tagger/tagger.cc
38
40
  ext/word_tagger/tagger.h
39
- ext/word_tagger/tagger.rb
40
41
  ext/word_tagger/test.rb
41
42
  ext/word_tagger/test/Makefile
42
43
  ext/word_tagger/test/doc.txt
@@ -44,6 +45,7 @@ ext/word_tagger/test/test.cc
44
45
  lib/brill/tagger.rb
45
46
  lib/rbtagger.rb
46
47
  lib/rbtagger/version.rb
48
+ lib/word/tagger.rb
47
49
  script/console
48
50
  script/destroy
49
51
  script/generate
@@ -51,6 +53,9 @@ script/txt2html
51
53
  setup.rb
52
54
  tasks/deployment.rake
53
55
  tasks/environment.rake
56
+ tasks/extconf.rake
57
+ tasks/extconf/rule_tagger.rake
58
+ tasks/extconf/word_tagger.rake
54
59
  tasks/website.rake
55
60
  test/CONTEXTUALRULEFILE
56
61
  test/LEXICALRULEFILE
@@ -65,8 +70,10 @@ test/docs/doc6.txt
65
70
  test/docs/doc7.txt
66
71
  test/docs/doc8.txt
67
72
  test/docs/doc9.txt
68
- test/tagger_test.rb
73
+ test/fixtures/tags.txt
69
74
  test/test_helper.rb
75
+ test/test_rule_tagger.rb
76
+ test/test_word_tagger.rb
70
77
  tools/rakehelp.rb
71
78
  website/index.html
72
79
  website/index.txt
data/Rakefile CHANGED
@@ -2,32 +2,3 @@ require 'config/requirements'
2
2
  require 'config/hoe' # setup Hoe + all gem configuration
3
3
 
4
4
  Dir['tasks/**/*.rake'].each { |rake| load rake }
5
-
6
- # redefine release
7
-
8
- desc 'Package and upload the release to rubyforge.'
9
- task :release_current => [:clean, :package] do |t|
10
- require 'config/hoe'
11
- version = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
12
- name = $hoe.name
13
- rubyforge_name = $hoe.rubyforge_name
14
- description = $hoe.description
15
- pkg = "pkg/#{name}-#{version}"
16
- abort "Package doesn't exist => #{pkg}" if !File.exist?(pkg)
17
-
18
- rf = RubyForge.new
19
- puts "Logging in"
20
- rf.login
21
-
22
- c = rf.userconfig
23
- c["release_notes"] = description if description
24
- c["release_changes"] = $hoe.changes if $hoe.changes
25
- c["preformatted"] = true
26
-
27
- files = [(@need_tar ? "#{pkg}.tgz" : nil),
28
- (@need_zip ? "#{pkg}.zip" : nil),
29
- "#{pkg}.gem"].compact
30
-
31
- puts "Releasing #{rubyforge_name} v. #{version}"
32
- rf.add_release 'ruletagger', 'ruletagger', version, *files
33
- end
data/config/hoe.rb CHANGED
@@ -4,7 +4,7 @@ AUTHOR = 'Todd A. Fisher' # can also be an array of Authors
4
4
  EMAIL = 'todd.fisher@gmail.com'
5
5
  DESCRIPTION = "A Simple Ruby Rule-Based Part of Speech Tagger"
6
6
  GEM_NAME = 'rbtagger' # what ppl will type to install your gem
7
- RUBYFORGE_PROJECT = 'ruletagger' # The unix name for your project
7
+ RUBYFORGE_PROJECT = 'rbtagger' # The unix name for your project
8
8
  HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
9
  DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
10
  EXTRA_DEPENDENCIES = [
@@ -57,7 +57,7 @@ $hoe = Hoe.new(GEM_NAME, VERS) do |p|
57
57
  p.description = DESCRIPTION
58
58
  p.summary = DESCRIPTION
59
59
  p.url = HOMEPATH
60
- p.rubyforge_name = PATH #RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
60
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
61
61
  p.test_globs = ["test/**/test_*.rb"]
62
62
  p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
63
63
 
@@ -0,0 +1,46 @@
1
+ have_header: checking for stdlib.h... -------------------- yes
2
+
3
+ "gcc -E -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -Os -pipe -fno-common conftest.c -o conftest.i"
4
+ checked program was:
5
+ /* begin */
6
+ 1: #include <stdlib.h>
7
+ /* end */
8
+
9
+ --------------------
10
+
11
+ have_header: checking for string.h... -------------------- yes
12
+
13
+ "gcc -E -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -Os -pipe -fno-common conftest.c -o conftest.i"
14
+ checked program was:
15
+ /* begin */
16
+ 1: #include <string.h>
17
+ /* end */
18
+
19
+ --------------------
20
+
21
+ have_library: checking for main() in -lc... -------------------- yes
22
+
23
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lruby -lc -lpthread -ldl -lm "
24
+ checked program was:
25
+ /* begin */
26
+ 1: /*top*/
27
+ 2: int main() { return 0; }
28
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
29
+ /* end */
30
+
31
+ --------------------
32
+
33
+ have_func: checking for snprintf() in stdio.h... -------------------- yes
34
+
35
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lc -lruby -lc -lpthread -ldl -lm "
36
+ checked program was:
37
+ /* begin */
38
+ 1: #include <stdio.h>
39
+ 2:
40
+ 3: /*top*/
41
+ 4: int main() { return 0; }
42
+ 5: int t() { void ((*volatile p)()); p = (void ((*)()))snprintf; return 0; }
43
+ /* end */
44
+
45
+ --------------------
46
+
@@ -0,0 +1,24 @@
1
+ have_library: checking for main() in -lc... -------------------- yes
2
+
3
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lruby -lc -lpthread -ldl -lm "
4
+ checked program was:
5
+ /* begin */
6
+ 1: /*top*/
7
+ 2: int main() { return 0; }
8
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
9
+ /* end */
10
+
11
+ --------------------
12
+
13
+ have_library: checking for main() in -lstdc++... -------------------- yes
14
+
15
+ "gcc -o conftest -I. -I/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0 -I. -arch ppc -arch i386 -Os -pipe -fno-common conftest.c -L"." -L"/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib" -L. -arch ppc -arch i386 -lc -lruby -lstdc++ -lc -lpthread -ldl -lm "
16
+ checked program was:
17
+ /* begin */
18
+ 1: /*top*/
19
+ 2: int main() { return 0; }
20
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
21
+ /* end */
22
+
23
+ --------------------
24
+
@@ -70,10 +70,10 @@ VALUE Tagger_alloc(VALUE klass)
70
70
  return object;
71
71
  }
72
72
 
73
- extern "C" void Init_rtagger()
73
+ extern "C" void Init_word_tagger()
74
74
  {
75
75
  rb_Tagger = rb_define_module( "Tagger" );
76
- rb_NWordTagger = rb_define_class_under( rb_Tagger, "NWordTagger", rb_cObject );
76
+ rb_NWordTagger = rb_define_class_under( rb_Tagger, "WordTagger", rb_cObject );
77
77
 
78
78
  rb_define_alloc_func( rb_NWordTagger, Tagger_alloc );
79
79
 
@@ -33,12 +33,21 @@ static std::vector<std::string> word_split(const std::string& s)
33
33
  return std::vector<std::string>(std::istream_iterator<std::string>(is), std::istream_iterator<std::string>());
34
34
  }
35
35
 
36
+ static void word_downcase( std::string &word )
37
+ {
38
+ for( int j = 0; j < word.size(); ++j ) {
39
+ word[j] = tolower( word[j] );
40
+ }
41
+ }
42
+
36
43
  NWordTagger::NWordTagger()
37
44
  : nwords(2), stemmer(porter_stemmer_new()){
38
45
  }
39
46
  NWordTagger::~NWordTagger(){
40
47
  porter_stemmer_free(stemmer);
41
48
  }
49
+
50
+
42
51
  void NWordTagger::loadTags( const std::set<std::string> &tags )
43
52
  {
44
53
  for( std::set<std::string>::iterator i = tags.begin(); i != tags.end(); ++i ){
@@ -51,14 +60,14 @@ void NWordTagger::loadTags( const std::set<std::string> &tags )
51
60
  stemmed += this->stemWord(words[j]) + " ";
52
61
  }
53
62
  stemmed = stemmed.substr(0,stemmed.length()-1);
54
- this->tags[stemmed] = word;
55
- //printf( "word: %s -> %s\n", word.c_str(), stemmed.c_str() );
56
63
  }
57
64
  else{
58
65
  stemmed = this->stemWord(*i);
59
- //printf( "word: %s -> %s\n", word.c_str(), stemmed.c_str() );
60
- this->tags[stemmed] = word;
61
66
  }
67
+ // downcase stemmed
68
+ word_downcase( stemmed );
69
+ //printf( "word: %s -> %s\n", word.c_str(), stemmed.c_str() );
70
+ this->tags[stemmed] = word;
62
71
 
63
72
  }
64
73
  }
@@ -84,6 +93,7 @@ std::vector<std::string> NWordTagger::execute( const char *text, short max )cons
84
93
 
85
94
  // get the stemmed word at position i
86
95
  match_word = this->stemWord(words[i]);
96
+ word_downcase( match_word );
87
97
 
88
98
  // now scan ahead nwords positions searching our tags table for matches
89
99
  for( short j = 1; (j <= this->nwords) && ((i+j) < words.size()); ++j ) {
@@ -1,8 +1,8 @@
1
1
  module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 0
5
- TINY = 1
4
+ MINOR = 2
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/rbtagger.rb CHANGED
@@ -2,5 +2,12 @@ $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
4
  module RbTagger
5
-
5
+ class << self
6
+ def tags_from_file( file )
7
+ File.read(file).split("\n").map{|t| t.strip}
8
+ end
9
+ end
6
10
  end
11
+
12
+ require 'word/tagger'
13
+ require 'brill/tagger'
@@ -0,0 +1,18 @@
1
+ module Word
2
+ require 'word_tagger'
3
+ class Tagger < Tagger::WordTagger
4
+ def initialize( tags, options = {} )
5
+ if tags.is_a?(String) and File.exist?(tags)
6
+ load_tags( RbTagger.tags_from_file( tags ) )
7
+ else
8
+ load_tags( tags )
9
+ end
10
+ set_words( options[:words] || 2 )
11
+ end
12
+
13
+ def execute( text )
14
+ # strip non alpha characters
15
+ super( text.gsub(/[^\w]/,' ') )
16
+ end
17
+ end
18
+ end
data/script/txt2html CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  GEM_NAME = 'rbtagger' # what ppl will type to install your gem
4
- RUBYFORGE_PROJECT = 'ruletagger'
4
+ RUBYFORGE_PROJECT = 'rbtagger'
5
5
 
6
6
  require 'rubygems'
7
7
  begin
@@ -0,0 +1,43 @@
1
+ namespace :extconf do
2
+ extension = File.basename(__FILE__, '.rake')
3
+
4
+ ext = "ext/#{extension}"
5
+ ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
6
+ ext_files = FileList[
7
+ "#{ext}/*.c",
8
+ "#{ext}/*.h",
9
+ "#{ext}/*.rl",
10
+ "#{ext}/extconf.rb",
11
+ "#{ext}/Makefile",
12
+ # "lib"
13
+ ]
14
+
15
+
16
+ task :compile => extension do
17
+ if Dir.glob("**/#{extension}.{o,so,dll,bundle}").length == 0
18
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
19
+ STDERR.puts "Gem actually failed to build. Your system is"
20
+ STDERR.puts "NOT configured properly to build #{GEM_NAME}."
21
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
22
+ exit(1)
23
+ end
24
+ end
25
+
26
+ desc "Builds just the #{extension} extension"
27
+ task extension.to_sym => ["#{ext}/Makefile", ext_so ]
28
+
29
+ file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
30
+ Dir.chdir(ext) do ruby "extconf.rb" end
31
+ end
32
+
33
+ file ext_so => ext_files do
34
+ Dir.chdir(ext) do
35
+ sh(PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
36
+ if !ok
37
+ require "fileutils"
38
+ FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,43 @@
1
+ namespace :extconf do
2
+ extension = File.basename(__FILE__, '.rake')
3
+
4
+ ext = "ext/#{extension}"
5
+ ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
6
+ ext_files = FileList[
7
+ "#{ext}/*.c",
8
+ "#{ext}/*.h",
9
+ "#{ext}/*.rl",
10
+ "#{ext}/extconf.rb",
11
+ "#{ext}/Makefile",
12
+ # "lib"
13
+ ]
14
+
15
+
16
+ task :compile => extension do
17
+ if Dir.glob("**/#{extension}.{o,so,dll,bundle}").length == 0
18
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
19
+ STDERR.puts "Gem actually failed to build. Your system is"
20
+ STDERR.puts "NOT configured properly to build #{GEM_NAME}."
21
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
22
+ exit(1)
23
+ end
24
+ end
25
+
26
+ desc "Builds just the #{extension} extension"
27
+ task extension.to_sym => ["#{ext}/Makefile", ext_so ]
28
+
29
+ file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
30
+ Dir.chdir(ext) do ruby "extconf.rb" end
31
+ end
32
+
33
+ file ext_so => ext_files do
34
+ Dir.chdir(ext) do
35
+ sh(PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
36
+ if !ok
37
+ require "fileutils"
38
+ FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,18 @@
1
+ namespace :extconf do
2
+ desc "Compiles the Ruby extension"
3
+ task :compile
4
+ end
5
+
6
+ BIN = "*.{bundle,jar,so,obj,pdb,lib,def,exp}"
7
+
8
+ task :compile => "extconf:compile" do
9
+ Dir["ext/**/*.{bundle,so,dll}"].each do|lib|
10
+ sh "cp #{lib} lib/"
11
+ end
12
+ end
13
+
14
+ task :test => :compile
15
+
16
+ $hoe.clean_globs |= ["ext/**/#{BIN}", "lib/**/#{BIN}", 'ext/word_tagger/Makefile', 'ext/rule_tagger/Makefile']
17
+ $hoe.spec.require_paths = Dir['{lib,ext/*}']
18
+ $hoe.spec.extensions = FileList["ext/**/extconf.rb"].to_a