te_rex 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 929e90be95279fcf5093f8d865457509bcc5e692
4
- data.tar.gz: 98e1e7ad046d7e3f423934187b0df0546a6500a0
3
+ metadata.gz: eba4b0e904897695a6ffe59f676cd41129d9f9d3
4
+ data.tar.gz: b9714ec30ca138fed335894c95503c113c8e605d
5
5
  SHA512:
6
- metadata.gz: ad648071a76d79757ecdbc793a31753bdfdf94f6c0e8b462b9f315c8d514e9fdc07c942d2adae49dc20c7d35911554ff48c7f41c6804cf62c1727c16542d897a
7
- data.tar.gz: a6397c1ea3b21735813c8f2cd739b889720d08746a5ea1903db1976890ea2c9eaac2e316ea886b2d6305c1e8ef3e02053c560b62dacaa3cfc5f0390094989893
6
+ metadata.gz: 2f0e6f419f099c3f485c1b829d1e5421935092af35fc2184fdd9a70de0f7127cbf95bcfc100e6e8f0d0efc7eeda15f4b8cc5f696211c852a92933220ae2894ff
7
+ data.tar.gz: 3f017ed552f0fdcb0ba89a0ffcc188f76c13780571b67ca4edd254980605ff5166af536610da2d3b2570b8bb5c39a3b1e0dc57d1873c1de89b370447b2426859
@@ -2,10 +2,11 @@ module TeRex
2
2
  module Format
3
3
  class BasicFile
4
4
 
5
- attr_reader :sentences, :path
5
+ attr_reader :sentences, :path, :category
6
6
 
7
- def initialize(file_path)
7
+ def initialize(file_path, klass)
8
8
  @path = file_path
9
+ @category = klass
9
10
  end
10
11
 
11
12
  # Each line of file with Array object,
@@ -2,10 +2,11 @@ module TeRex
2
2
  module Format
3
3
  class BrownFile
4
4
 
5
- attr_accessor :sentences
5
+ attr_reader :sentences, :path, :category
6
6
 
7
- def initialize(file_path)
7
+ def initialize(file_path, klass)
8
8
  @path = file_path
9
+ @category = klass
9
10
  end
10
11
 
11
12
  # Each line of file with Array object,
@@ -3,12 +3,13 @@ module TeRex
3
3
  require 'csv'
4
4
  class ErrorFile
5
5
 
6
- attr_reader :sentences, :path
6
+ attr_reader :sentences, :path, :category
7
7
 
8
8
  @@csv_conf = {:headers => true}
9
9
 
10
- def initialize(file_path)
10
+ def initialize(file_path, klass)
11
11
  @path = file_path
12
+ @category = klass
12
13
  end
13
14
 
14
15
  # Each row of csv as Array object, strip it and return
data/lib/format/format.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module TeRex
2
2
  module Format
3
3
  def self.category_term(t)
4
- t.capitalize.intern
4
+ t.intern
5
5
  end
6
6
  end
7
7
  end
data/lib/te_rex/bayes.rb CHANGED
@@ -5,8 +5,7 @@ module TeRex
5
5
  module Classifier
6
6
  class Bayes
7
7
 
8
- attr_accessor :category_counts, :total_words
9
- attr_reader :messages
8
+ attr_reader :category_counts, :total_words, :messages
10
9
 
11
10
  # categories = [{:tag => "Thing1", :msg => "Thing1 message"}, {:tag => "Thing2", :msg => "Thing2 message"}]
12
11
  # initialize({:tag => "Refund", :msg => "You'll get a refund"}, {:tag => "Nonrefund", :msg => "You won't get a refund"})
data/lib/te_rex/corpus.rb CHANGED
@@ -30,18 +30,24 @@ module TeRex
30
30
  end
31
31
 
32
32
  def sentence_partition
33
- #super_set = build_superset
34
33
  corpus_set = partition_files_for_sentences
35
- #@sample_size = (superset.count.to_f * 0.75).round
36
- @sample_size = 0.0
37
34
  @training = partition_training_by_sentence(corpus_set)
38
35
  @testing = partition_test_by_sentence(corpus_set)
39
- count_all
36
+ c = count_all
37
+ @sample_size = (c.to_f * 0.75)
38
+ c
39
+ end
40
+
41
+ def build_superset
42
+ @set.reduce([]) do |memo,formatter|
43
+ memo << formatter.sentences
44
+ end.flatten
40
45
  end
41
46
 
47
+ private
42
48
  def define_set
43
49
  @set ||= Dir[@glob].map do |file|
44
- @format_klass.new(file)
50
+ @format_klass.new(file, @category_klass)
45
51
  end
46
52
  @set
47
53
  end
@@ -72,12 +78,6 @@ module TeRex
72
78
  c_set.sample(c_set.count * 0.25)
73
79
  end
74
80
 
75
- def build_superset
76
- @set.reduce([]) do |memo,formatter|
77
- memo << formatter.sentences
78
- end.flatten
79
- end
80
-
81
81
  def count_all
82
82
  counter = 0
83
83
  @set.map{|f| counter += f.sentences.count}
@@ -1,3 +1,3 @@
1
1
  module TeRex
2
- VERSION = "0.0.11"
2
+ VERSION = "0.0.12"
3
3
  end
data/test/corpus_test.rb CHANGED
@@ -48,6 +48,11 @@ class CorpusTest < MicroTest::Test
48
48
  assert (22...27).map{|i| i}.include?((ratio * 100).to_i)
49
49
  end
50
50
 
51
+ test "sample size equals size of training set" do
52
+ sample = @@sent_corpus.total_sentences.to_f * 0.75
53
+ assert @@sent_corpus.sample_size == sample
54
+ end
55
+
51
56
  test "sentence counts are correct" do
52
57
  assert @@sent_corpus.set.count == 3
53
58
  assert @@sent_corpus.training.count == 9
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: te_rex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joshua Bowles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-13 00:00:00.000000000 Z
11
+ date: 2014-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast-stemmer