te_rex 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 929e90be95279fcf5093f8d865457509bcc5e692
4
- data.tar.gz: 98e1e7ad046d7e3f423934187b0df0546a6500a0
3
+ metadata.gz: eba4b0e904897695a6ffe59f676cd41129d9f9d3
4
+ data.tar.gz: b9714ec30ca138fed335894c95503c113c8e605d
5
5
  SHA512:
6
- metadata.gz: ad648071a76d79757ecdbc793a31753bdfdf94f6c0e8b462b9f315c8d514e9fdc07c942d2adae49dc20c7d35911554ff48c7f41c6804cf62c1727c16542d897a
7
- data.tar.gz: a6397c1ea3b21735813c8f2cd739b889720d08746a5ea1903db1976890ea2c9eaac2e316ea886b2d6305c1e8ef3e02053c560b62dacaa3cfc5f0390094989893
6
+ metadata.gz: 2f0e6f419f099c3f485c1b829d1e5421935092af35fc2184fdd9a70de0f7127cbf95bcfc100e6e8f0d0efc7eeda15f4b8cc5f696211c852a92933220ae2894ff
7
+ data.tar.gz: 3f017ed552f0fdcb0ba89a0ffcc188f76c13780571b67ca4edd254980605ff5166af536610da2d3b2570b8bb5c39a3b1e0dc57d1873c1de89b370447b2426859
@@ -2,10 +2,11 @@ module TeRex
2
2
  module Format
3
3
  class BasicFile
4
4
 
5
- attr_reader :sentences, :path
5
+ attr_reader :sentences, :path, :category
6
6
 
7
- def initialize(file_path)
7
+ def initialize(file_path, klass)
8
8
  @path = file_path
9
+ @category = klass
9
10
  end
10
11
 
11
12
  # Each line of file with Array object,
@@ -2,10 +2,11 @@ module TeRex
2
2
  module Format
3
3
  class BrownFile
4
4
 
5
- attr_accessor :sentences
5
+ attr_reader :sentences, :path, :category
6
6
 
7
- def initialize(file_path)
7
+ def initialize(file_path, klass)
8
8
  @path = file_path
9
+ @category = klass
9
10
  end
10
11
 
11
12
  # Each line of file with Array object,
@@ -3,12 +3,13 @@ module TeRex
3
3
  require 'csv'
4
4
  class ErrorFile
5
5
 
6
- attr_reader :sentences, :path
6
+ attr_reader :sentences, :path, :category
7
7
 
8
8
  @@csv_conf = {:headers => true}
9
9
 
10
- def initialize(file_path)
10
+ def initialize(file_path, klass)
11
11
  @path = file_path
12
+ @category = klass
12
13
  end
13
14
 
14
15
  # Each row of csv as Array object, strip it and return
data/lib/format/format.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module TeRex
2
2
  module Format
3
3
  def self.category_term(t)
4
- t.capitalize.intern
4
+ t.intern
5
5
  end
6
6
  end
7
7
  end
data/lib/te_rex/bayes.rb CHANGED
@@ -5,8 +5,7 @@ module TeRex
5
5
  module Classifier
6
6
  class Bayes
7
7
 
8
- attr_accessor :category_counts, :total_words
9
- attr_reader :messages
8
+ attr_reader :category_counts, :total_words, :messages
10
9
 
11
10
  # categories = [{:tag => "Thing1", :msg => "Thing1 message"}, {:tag => "Thing2", :msg => "Thing2 message"}]
12
11
  # initialize({:tag => "Refund", :msg => "You'll get a refund"}, {:tag => "Nonrefund", :msg => "You won't get a refund"})
data/lib/te_rex/corpus.rb CHANGED
@@ -30,18 +30,24 @@ module TeRex
30
30
  end
31
31
 
32
32
  def sentence_partition
33
- #super_set = build_superset
34
33
  corpus_set = partition_files_for_sentences
35
- #@sample_size = (superset.count.to_f * 0.75).round
36
- @sample_size = 0.0
37
34
  @training = partition_training_by_sentence(corpus_set)
38
35
  @testing = partition_test_by_sentence(corpus_set)
39
- count_all
36
+ c = count_all
37
+ @sample_size = (c.to_f * 0.75)
38
+ c
39
+ end
40
+
41
+ def build_superset
42
+ @set.reduce([]) do |memo,formatter|
43
+ memo << formatter.sentences
44
+ end.flatten
40
45
  end
41
46
 
47
+ private
42
48
  def define_set
43
49
  @set ||= Dir[@glob].map do |file|
44
- @format_klass.new(file)
50
+ @format_klass.new(file, @category_klass)
45
51
  end
46
52
  @set
47
53
  end
@@ -72,12 +78,6 @@ module TeRex
72
78
  c_set.sample(c_set.count * 0.25)
73
79
  end
74
80
 
75
- def build_superset
76
- @set.reduce([]) do |memo,formatter|
77
- memo << formatter.sentences
78
- end.flatten
79
- end
80
-
81
81
  def count_all
82
82
  counter = 0
83
83
  @set.map{|f| counter += f.sentences.count}
@@ -1,3 +1,3 @@
1
1
  module TeRex
2
- VERSION = "0.0.11"
2
+ VERSION = "0.0.12"
3
3
  end
data/test/corpus_test.rb CHANGED
@@ -48,6 +48,11 @@ class CorpusTest < MicroTest::Test
48
48
  assert (22...27).map{|i| i}.include?((ratio * 100).to_i)
49
49
  end
50
50
 
51
+ test "sample size equals size of training set" do
52
+ sample = @@sent_corpus.total_sentences.to_f * 0.75
53
+ assert @@sent_corpus.sample_size == sample
54
+ end
55
+
51
56
  test "sentence counts are correct" do
52
57
  assert @@sent_corpus.set.count == 3
53
58
  assert @@sent_corpus.training.count == 9
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: te_rex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joshua Bowles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-13 00:00:00.000000000 Z
11
+ date: 2014-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast-stemmer