RubyGems - rbtagger - Versions diffs - 0.4.6 → 0.4.7 - Mend

rbtagger 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/Rakefile CHANGED

@@ -1,6 +1,9 @@
 require 'rake/clean'
 require 'rake/testtask'
 require 'rake/rdoctask'
+$:.unshift File.expand_path(File.dirname(__FILE__))
+$:.unshift File.expand_path(File.dirname(__FILE__), 'lib')
+$:.unshift File.expand_path(File.dirname(__FILE__), 'ext')
 CLEAN.include '**/*.o'
 CLEAN.include "**/*.#{Config::MAKEFILE_CONFIG['DLEXT']}"

data/lib/brill/tagger.rb CHANGED

@@ -1,3 +1,5 @@
+# encoding: utf-8
 require 'rule_tagger/rule_tagger'
 module Brill
@@ -29,7 +31,7 @@ module Brill
     # see: http://cpansearch.perl.org/src/ACOBURN/Lingua-EN-Tagger-0.15/Tagger.pm
     def noun_phrases(text)
       # ?:$PREP|$DET|$NUM)
-      #
+      #
       tags = tag(text.gsub(/[^\w]/,' '))
       phrases = []
       phrase = []
@@ -62,7 +64,7 @@ module Brill
       # join NNP's together for names
       reduced_tags = []
       mappings = {} # keep a mapping of the joined words to expand
-      tags.each{|tag|
+      tags.each{|tag|
         if ptag.last == 'NNP' and tag.last == 'NNP' and !ptag.first.match(/\.$/)
           ptag[0] += " " + tag.first
           # before combining these two create a mapping for each word to each word
@@ -119,7 +121,7 @@ module Brill
     end
     # Tag a body of text
-    # returns an array like [[token,tag],[token,tag]...[token,tag]]
+    # returns an array like [[token,tag],[token,tag]...[token,tag]]
     #
     def tag( text )
       # XXX: the list of contractions is much larger then this... find'em
@@ -130,14 +132,14 @@ module Brill
       @tagger.apply_lexical_rules( tokens, tags, [], 0 )
       @tagger.default_tag_finish( tokens, tags )
       # Brill uses these fake "STAART" tags to delimit the start & end of sentence.
-      tokens << "STAART"
-      tokens << "STAART"
+      tokens << "STAART"
+      tokens << "STAART"
       tokens.unshift "STAART"
       tokens.unshift "STAART"
-      tags << "STAART"
-      tags << "STAART"
+      tags << "STAART"
+      tags << "STAART"
       tags.unshift "STAART"
       tags.unshift "STAART"
@@ -166,7 +168,7 @@ module Brill
       end
       lines
     end
-    # load LEXICON
+    # load LEXICON
     def self.load_lexicon(tagger,lexicon)
       lines = Brill::Tagger.lines(lexicon)
       i = 0
@@ -187,7 +189,7 @@ module Brill
       end
     end
-    # load LEXICALRULEFILE
+    # load LEXICALRULEFILE
     def self.load_lexical_rules(tagger,rules)
       lines = self.lines(rules)
       i = 0
@@ -273,7 +275,7 @@ module Brill
       # Isolate any embedded punctuation chars
       #   s{([,;:\@\#\$\%&])} { $1 }g;
       text.gsub!(/([,;:\@\#\$\%&])/, ' \1 ')
       # Assume sentence tokenization has been done first, so split FINAL
       # periods only.
       # s/ ([^.]) \.  ([\]\)\}\>\"\']*) [ \t]* $ /$1 .$2 /gx;
@@ -329,7 +331,7 @@ module Brill
       text.gsub!(/ (\'t)(is|was) /i,' \1 \2 ')
       #s/ (wan)(na) / $1 $2 /ig;
       text.gsub!(/ (wan)(na) /i,' \1 \2 ')
       text.split(/\s/)
     end

data/lib/rbtagger.rb CHANGED

@@ -1,3 +1,4 @@
+# encoding: utf-8
 module RbTagger
   class << self
     def tags_from_file( file )

data/lib/rbtagger/version.rb CHANGED

@@ -1,8 +1,9 @@
+# encoding: utf-8
 module RbTagger #:nodoc:
   module VERSION #:nodoc:
     MAJOR = 0
     MINOR = 4
-    TINY  = 6
+    TINY  = 7
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/lib/word/tagger.rb CHANGED

@@ -1,3 +1,4 @@
+# encoding: utf-8
 require 'word_tagger/word_tagger'
 module Word

metadata CHANGED

@@ -1,7 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rbtagger
 version: !ruby/object:Gem::Version
-  version: 0.4.6
+  hash: 1
+  prerelease: false
+  segments:
+  - 0
+  - 4
+  - 7
+  version: 0.4.7
 platform: ruby
 authors:
 - Todd A. Fisher
@@ -9,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-01-25 00:00:00 -05:00
+date: 2011-03-30 00:00:00 -04:00
 default_executable:
 dependencies: []
@@ -76,21 +82,27 @@ require_paths:
 - lib
 - ext
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
       version: "0"
-  version:
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
       version: "0"
-  version:
 requirements: []
 rubyforge_project: curb
-rubygems_version: 1.3.5
+rubygems_version: 1.3.7
 signing_key:
 specification_version: 3
 summary: Ruby libcurl bindings