RubyGems - te_rex - Versions diffs - 0.0.11 → 0.0.12 - Mend

te_rex 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 929e90be95279fcf5093f8d865457509bcc5e692
-  data.tar.gz: 98e1e7ad046d7e3f423934187b0df0546a6500a0
+  metadata.gz: eba4b0e904897695a6ffe59f676cd41129d9f9d3
+  data.tar.gz: b9714ec30ca138fed335894c95503c113c8e605d
 SHA512:
-  metadata.gz: ad648071a76d79757ecdbc793a31753bdfdf94f6c0e8b462b9f315c8d514e9fdc07c942d2adae49dc20c7d35911554ff48c7f41c6804cf62c1727c16542d897a
-  data.tar.gz: a6397c1ea3b21735813c8f2cd739b889720d08746a5ea1903db1976890ea2c9eaac2e316ea886b2d6305c1e8ef3e02053c560b62dacaa3cfc5f0390094989893
+  metadata.gz: 2f0e6f419f099c3f485c1b829d1e5421935092af35fc2184fdd9a70de0f7127cbf95bcfc100e6e8f0d0efc7eeda15f4b8cc5f696211c852a92933220ae2894ff
+  data.tar.gz: 3f017ed552f0fdcb0ba89a0ffcc188f76c13780571b67ca4edd254980605ff5166af536610da2d3b2570b8bb5c39a3b1e0dc57d1873c1de89b370447b2426859

data/lib/format/basic_file.rb CHANGED Viewed

@@ -2,10 +2,11 @@ module TeRex
   module Format
     class BasicFile
-      attr_reader :sentences, :path
+      attr_reader :sentences, :path, :category
-      def initialize(file_path)
+      def initialize(file_path, klass)
         @path = file_path
+        @category = klass
       end
       # Each line of file with Array object,

data/lib/format/brown_file.rb CHANGED Viewed

@@ -2,10 +2,11 @@ module TeRex
   module Format
     class BrownFile
-      attr_accessor :sentences
+      attr_reader :sentences, :path, :category
-      def initialize(file_path)
+      def initialize(file_path, klass)
         @path = file_path
+        @category = klass
       end
       # Each line of file with Array object,

data/lib/format/error_file.rb CHANGED Viewed

@@ -3,12 +3,13 @@ module TeRex
     require 'csv'
     class ErrorFile
-      attr_reader :sentences, :path
+      attr_reader :sentences, :path, :category
       @@csv_conf = {:headers => true}
-      def initialize(file_path)
+      def initialize(file_path, klass)
         @path = file_path
+        @category = klass
       end
       # Each row of csv as Array object, strip it and return

data/lib/format/format.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module TeRex
   module Format
     def self.category_term(t)
-      t.capitalize.intern
+      t.intern
     end
   end
 end

data/lib/te_rex/bayes.rb CHANGED Viewed

@@ -5,8 +5,7 @@ module TeRex
   module Classifier
     class Bayes
-      attr_accessor :category_counts, :total_words
-      attr_reader :messages
+      attr_reader :category_counts, :total_words, :messages
       # categories = [{:tag => "Thing1", :msg => "Thing1 message"}, {:tag => "Thing2", :msg => "Thing2 message"}]
       # initialize({:tag => "Refund", :msg => "You'll get a refund"}, {:tag => "Nonrefund", :msg => "You won't get a refund"})

data/lib/te_rex/corpus.rb CHANGED Viewed

@@ -30,18 +30,24 @@ module TeRex
       end
       def sentence_partition
-        #super_set = build_superset
         corpus_set = partition_files_for_sentences
-        #@sample_size = (superset.count.to_f * 0.75).round
-        @sample_size = 0.0
         @training = partition_training_by_sentence(corpus_set)
         @testing = partition_test_by_sentence(corpus_set)
-        count_all
+        c = count_all
+        @sample_size = (c.to_f * 0.75)
+        c
+      end
+      def build_superset
+        @set.reduce([]) do |memo,formatter|
+          memo << formatter.sentences
+        end.flatten
       end
+      private
       def define_set
         @set ||= Dir[@glob].map do |file|
-          @format_klass.new(file)
+          @format_klass.new(file, @category_klass)
         end
         @set
       end
@@ -72,12 +78,6 @@ module TeRex
         c_set.sample(c_set.count * 0.25)
       end
-      def build_superset
-        @set.reduce([]) do |memo,formatter|
-          memo << formatter.sentences
-        end.flatten
-      end
       def count_all
         counter = 0
         @set.map{|f| counter += f.sentences.count}

data/lib/te_rex/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TeRex
-  VERSION = "0.0.11"
+  VERSION = "0.0.12"
 end

data/test/corpus_test.rb CHANGED Viewed

@@ -48,6 +48,11 @@ class CorpusTest < MicroTest::Test
     assert (22...27).map{|i| i}.include?((ratio * 100).to_i)
   end
+  test "sample size equals size of training set" do
+    sample = @@sent_corpus.total_sentences.to_f * 0.75
+    assert @@sent_corpus.sample_size == sample
+  end
   test "sentence counts are correct" do
     assert @@sent_corpus.set.count == 3
     assert @@sent_corpus.training.count == 9

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: te_rex
 version: !ruby/object:Gem::Version
-  version: 0.0.11
+  version: 0.0.12
 platform: ruby
 authors:
 - Joshua Bowles
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-13 00:00:00.000000000 Z
+date: 2014-11-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: fast-stemmer