thera 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. data/.document +5 -0
  2. data/.gitignore +56 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +20 -0
  5. data/LICENSE.txt +1 -0
  6. data/README.rdoc +8 -0
  7. data/Rakefile +1 -0
  8. data/ext/Makefile +225 -0
  9. data/ext/extconf.rb +29 -0
  10. data/ext/quarry/quarry_toolkit.cpp +148 -0
  11. data/lib/quarry/Makefile.linux +2 -0
  12. data/lib/quarry/Makefile.osx +6 -0
  13. data/lib/quarry/Makefile.targets +23 -0
  14. data/lib/quarry/obj/.gitkeep +0 -0
  15. data/lib/quarry/src/classifier/aode/aode_classifier.cpp +0 -0
  16. data/lib/quarry/src/classifier/aode/aode_classifier.h +0 -0
  17. data/lib/quarry/src/classifier/centroid/centroid_classifier.cpp +0 -0
  18. data/lib/quarry/src/classifier/centroid/centroid_classifier.h +0 -0
  19. data/lib/quarry/src/classifier/classifier.cpp +32 -0
  20. data/lib/quarry/src/classifier/classifier.h +59 -0
  21. data/lib/quarry/src/classifier/knn/knn_classifier.cpp +0 -0
  22. data/lib/quarry/src/classifier/knn/knn_classifier.h +0 -0
  23. data/lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.cpp +40 -0
  24. data/lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.h +18 -0
  25. data/lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.cpp +80 -0
  26. data/lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.h +52 -0
  27. data/lib/quarry/src/data_set/data_set.cpp +130 -0
  28. data/lib/quarry/src/data_set/data_set.h +78 -0
  29. data/lib/quarry/src/data_set/dense/dense_data_set.h +39 -0
  30. data/lib/quarry/src/data_set/dense/dense_example.h +44 -0
  31. data/lib/quarry/src/data_set/example.cpp +10 -0
  32. data/lib/quarry/src/data_set/example.h +23 -0
  33. data/lib/quarry/src/data_set/feature.h +36 -0
  34. data/lib/quarry/src/data_set/features/nominal_feature.cpp +57 -0
  35. data/lib/quarry/src/data_set/features/nominal_feature.h +76 -0
  36. data/lib/quarry/src/data_set/features/numeric_feature.cpp +69 -0
  37. data/lib/quarry/src/data_set/features/numeric_feature.h +78 -0
  38. data/lib/quarry/src/data_set/sparse/sparse_data_set.h +40 -0
  39. data/lib/quarry/src/data_set/sparse/sparse_example.cpp +82 -0
  40. data/lib/quarry/src/data_set/sparse/sparse_example.h +38 -0
  41. data/lib/quarry/src/metrics/confusion_matrix.cpp +129 -0
  42. data/lib/quarry/src/metrics/confusion_matrix.h +82 -0
  43. data/lib/quarry/src/model/model.cpp +29 -0
  44. data/lib/quarry/src/model/model.h +50 -0
  45. data/lib/quarry/src/preprocessing/examples/example_preprocessor.h +20 -0
  46. data/lib/quarry/src/preprocessing/examples/weights/binary_weight.h +20 -0
  47. data/lib/quarry/src/preprocessing/examples/weights/local_weight.h +29 -0
  48. data/lib/quarry/src/preprocessing/text/example_generator/example_generator.h +19 -0
  49. data/lib/quarry/src/preprocessing/text/example_generator/token_counter.h +59 -0
  50. data/lib/quarry/src/preprocessing/text/inplace_processor/downcase.h +26 -0
  51. data/lib/quarry/src/preprocessing/text/inplace_processor/inplace_processor.h +17 -0
  52. data/lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer.h +44 -0
  53. data/lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer_original.cpp +375 -0
  54. data/lib/quarry/src/preprocessing/text/text_pipeline.cpp +29 -0
  55. data/lib/quarry/src/preprocessing/text/text_pipeline.h +37 -0
  56. data/lib/quarry/src/preprocessing/text/token_selector/pos_tag_selector.h +21 -0
  57. data/lib/quarry/src/preprocessing/text/token_selector/stop_words.cpp +82 -0
  58. data/lib/quarry/src/preprocessing/text/token_selector/stop_words.h +20 -0
  59. data/lib/quarry/src/preprocessing/text/token_selector/token_selector.h +17 -0
  60. data/lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp +29 -0
  61. data/lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.h +20 -0
  62. data/lib/quarry/src/preprocessing/text/tokeniser/tokeniser.h +19 -0
  63. data/lib/quarry/src/quarry.cpp +1 -0
  64. data/lib/quarry/src/quarry.h +29 -0
  65. data/lib/quarry/src/storage/arff.cpp +198 -0
  66. data/lib/quarry/src/storage/arff.h +26 -0
  67. data/lib/quarry/src/storage/binary.cpp +457 -0
  68. data/lib/quarry/src/storage/binary.h +79 -0
  69. data/lib/quarry/src/storage/folders.cpp +98 -0
  70. data/lib/quarry/src/storage/folders.h +25 -0
  71. data/lib/quarry/src/storage/storage.h +19 -0
  72. data/lib/quarry/src/test.cpp +6 -0
  73. data/lib/quarry_rb/classifier/classifier.rb +22 -0
  74. data/lib/quarry_rb/classifier/naive_bayes_classifier.rb +10 -0
  75. data/lib/quarry_rb/confusion_matrix.rb +58 -0
  76. data/lib/quarry_rb/data_set/data_set.rb +42 -0
  77. data/lib/quarry_rb/data_set/example.rb +33 -0
  78. data/lib/quarry_rb/data_set/feature.rb +28 -0
  79. data/lib/quarry_rb/enumerable_helper.rb +32 -0
  80. data/lib/quarry_rb/model/model.rb +56 -0
  81. data/lib/quarry_rb/storage/arff.rb +11 -0
  82. data/lib/quarry_rb/storage/binary.rb +23 -0
  83. data/lib/quarry_rb/storage/folders.rb +11 -0
  84. data/lib/quarry_rb/text_pipeline.rb +16 -0
  85. data/lib/thera.rb +20 -0
  86. data/test/helper.rb +19 -0
  87. data/test/test_quarry.rb +33 -0
  88. data/thera.gemspec +21 -0
  89. metadata +148 -0
@@ -0,0 +1,10 @@
1
+ module Quarry
2
+ module Classifier
3
+ class NaiveBayesClassifier < Classifier
4
+ def initialize(data_set)
5
+ @classifier = Quarry::Classifier::ImplNaiveBayesClassifier.new(data_set.data_set)
6
+ super(data_set)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,58 @@
1
+ module Quarry
2
+ class ConfusionMatrix
3
+ attr_reader :confusion_matrix
4
+ attr_reader :data_set
5
+ def initialize(data_set, matrix=nil)
6
+ @data_set = data_set
7
+ if matrix
8
+ @confusion_matrix = matrix
9
+ else
10
+ @confusion_matrix = Quarry::ImplConfusionMatrix.new(data_set.data_set)
11
+ end
12
+ end
13
+
14
+ def add(predicted, actual)
15
+ @confusion_matrix.add(predicted.category, actual.category)
16
+ end
17
+
18
+ def accuracy
19
+ @confusion_matrix.accuracy
20
+ end
21
+
22
+ def error
23
+ @confusion_matrix.error
24
+ end
25
+
26
+ def tp(category)
27
+ @confusion_matrix.tp(category.category)
28
+ end
29
+
30
+ def fp(category)
31
+ @confusion_matrix.fp(category.category)
32
+ end
33
+
34
+ def tn(category)
35
+ @confusion_matrix.tn(category.category)
36
+ end
37
+
38
+ def fn(category)
39
+ @confusion_matrix.fn(category.category)
40
+ end
41
+
42
+ def precision(category)
43
+ @confusion_matrix.precision(category)
44
+ end
45
+
46
+ def recall(category)
47
+ @confusion_matrix.recall(category)
48
+ end
49
+
50
+ def fscore(category)
51
+ @confusion_matrix.fscore(category)
52
+ end
53
+
54
+ def print_summary
55
+ @confusion_matrix.print_summary
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,42 @@
1
+ module Quarry
2
+ module DataSet
3
+ class DataSet
4
+ attr_reader :data_set
5
+ def initialize(data_set=nil)
6
+ @data_set = data_set || ImplDataSet.new()
7
+ end
8
+
9
+ def name
10
+ @data_set.get_name
11
+ end
12
+
13
+ def name=(new_name)
14
+ @data_set.set_name(new_name)
15
+ end
16
+
17
+ def examples
18
+ @examples ||= EnumerableHelper.new(self, @data_set, Example, :examples_size, :get_example_by_index)
19
+ end
20
+
21
+ def categories
22
+ @categories ||= EnumerableHelper.new(self, @data_set, Category, :categories_size, :get_category_by_index)
23
+ end
24
+
25
+ def features
26
+ @features ||= EnumerableHelper.new(self, @data_set, Feature, :features_size, :get_feature_by_index)
27
+ end
28
+
29
+ def stratify(classifier, folds, skip_fold)
30
+ @data_set.stratify(classifier.classifier, folds, skip_fold)
31
+ end
32
+
33
+ def classify_fold(classifier, folds, fold)
34
+ ConfusionMatrix.new(self, @data_set.classify_fold(classifier.classifier, folds, fold))
35
+ end
36
+
37
+ def cross_fold_validation(classifier, folds)
38
+ ConfusionMatrix.new(self, @data_set.cross_fold_validation(classifier.classifier, folds))
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,33 @@
1
+ module Quarry
2
+ module DataSet
3
+ class Example
4
+ attr_reader :example
5
+
6
+ def initialize(example, data_set)
7
+ @data_set = data_set
8
+ @example = example
9
+ end
10
+
11
+ def [](index)
12
+ @example.get_value(index)
13
+ end
14
+
15
+ def []=(index, value)
16
+ @example.set_value(index, value)
17
+ end
18
+
19
+ def category
20
+ @data_set.categories[@example.get_category_index]
21
+ end
22
+
23
+ def category_index
24
+ @example.get_category_index
25
+ end
26
+
27
+ def category=(new_category)
28
+ raise "new_category must be an instance of Quarry::DataSet::Category" unless new_category.is_a?(Category)
29
+ @example.set_category(new_category.category)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,28 @@
1
+ module Quarry
2
+ module DataSet
3
+ class Feature
4
+ attr_reader :feature
5
+
6
+ def initialize(feature, data_set)
7
+ @data_set = data_set
8
+ @feature = feature
9
+ end
10
+
11
+ def name
12
+ @feature.get_name
13
+ end
14
+
15
+ def name=(new_name)
16
+ @feature.set_name(new_name)
17
+ end
18
+
19
+ def type
20
+ @feature.get_type
21
+ end
22
+
23
+ def type=(new_type)
24
+ @feature.set_type(new_type)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,32 @@
1
+ module Quarry
2
+ class EnumerableHelper
3
+ include Enumerable
4
+ attr_reader :size
5
+
6
+ def initialize(data_set, container, klass, size, get_fn)
7
+ @size = container.send(size)
8
+ @container = container
9
+ @data_set = data_set
10
+ @get_fn = get_fn
11
+ @klass = klass
12
+
13
+ # because of the way the Ruby GC works, it's easier to store
14
+ # references to enumerated objects here than from the C++ side.
15
+ # by keeping a reference to returned objects in this object,
16
+ # iterated objects that shouldn't be released (e.g Examples)
17
+ # won't be until the data set is released.
18
+ @objects = Hash.new {|hash, index| hash[index] = @klass.new(@container.send(@get_fn, index), @data_set)}
19
+ end
20
+
21
+ def [](index)
22
+ return nil if (index < 0) || (index >= @size)
23
+ @objects[index]
24
+ end
25
+
26
+ def each
27
+ (0...@size).each do |index|
28
+ yield @objects[index]
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,56 @@
1
+ module Quarry
2
+ class Model
3
+ attr_reader :model
4
+ def initialize(m = nil)
5
+ @model = m || Quarry::ImplModel.new
6
+ end
7
+
8
+ def data_set
9
+ Quarry::DataSet::DataSet.new(@model.get_data_set)
10
+ end
11
+
12
+ def data_set=(ds)
13
+ @model.set_data_set(ds.data_set)
14
+ end
15
+
16
+ # def classifier
17
+ # @model.get_classifier
18
+ # end
19
+
20
+ def classifier=(c)
21
+ @model.set_classifier(c.classifier)
22
+ end
23
+
24
+ def text_pipeline
25
+ TextPipeline.new(@model.get_text_pipeline)
26
+ end
27
+
28
+ def text_pipeline=(t)
29
+ @model.set_text_pipeline(t.text_pipeline)
30
+ end
31
+
32
+ def train(example)
33
+ @model.train(example)
34
+ end
35
+
36
+ def train_text(text)
37
+ @model.train_text(text)
38
+ end
39
+
40
+ def classify(example)
41
+ @model.classify(example)
42
+ end
43
+
44
+ def classify_text(text)
45
+ @model.classify_text(text)
46
+ end
47
+
48
+ def rank(example)
49
+ @model.rank(example)
50
+ end
51
+
52
+ def rank_text(text)
53
+ @model.rank_text(text)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,11 @@
1
+ module Quarry
2
+ class ARFF
3
+ def initialize(path)
4
+ @arff = Quarry::ImplARFF.new(path)
5
+ end
6
+
7
+ def read
8
+ Quarry::DataSet::DataSet.new(@arff.read)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,23 @@
1
+ module Quarry
2
+ class Binary
3
+ def initialize(path)
4
+ @binary = Quarry::ImplBinary.new(path)
5
+ end
6
+
7
+ def read
8
+ Quarry::DataSet::DataSet.new(@binary.read)
9
+ end
10
+
11
+ def write(data_set)
12
+ @binary.write(data_set.data_set)
13
+ end
14
+
15
+ def read_model
16
+ Quarry::Model.new(@binary.read_model)
17
+ end
18
+
19
+ def write_model(model)
20
+ @binary.write_model(model.model)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,11 @@
1
+ module Quarry
2
+ class Folders
3
+ def initialize(path, pipeline)
4
+ @folders = Quarry::ImplFolders.new(path, pipeline.text_pipeline)
5
+ end
6
+
7
+ def read
8
+ Quarry::DataSet::DataSet.new(@folders.read)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,16 @@
1
+ module Quarry
2
+ class TextPipeline
3
+ attr_reader :text_pipeline
4
+ def initialize(tp = nil)
5
+ @text_pipeline = tp || Quarry::ImplTextPipeline.new
6
+ end
7
+
8
+ def process_text(data_set, text)
9
+ Example.new(@text_pipeline.process_text(data_set, text))
10
+ end
11
+
12
+ def self.standard_pipeline
13
+ new(Quarry::Preprocessing::Text.standard_pipeline)
14
+ end
15
+ end
16
+ end
data/lib/thera.rb ADDED
@@ -0,0 +1,20 @@
1
+ require 'quarry_toolkit'
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ require './quarry_rb/text_pipeline'
4
+
5
+ require './quarry_rb/model/model'
6
+
7
+ require './quarry_rb/storage/arff'
8
+ require './quarry_rb/storage/binary'
9
+ require './quarry_rb/storage/folders'
10
+
11
+ require './quarry_rb/enumerable_helper'
12
+ require './quarry_rb/confusion_matrix'
13
+
14
+ require './quarry_rb/data_set/data_set'
15
+ require './quarry_rb/data_set/example'
16
+ require './quarry_rb/data_set/feature'
17
+
18
+ require './quarry_rb/classifier/classifier'
19
+ require './quarry_rb/classifier/naive_bayes_classifier'
20
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext'))
14
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
16
+ require 'quarry'
17
+
18
+ class Test::Unit::TestCase
19
+ end
@@ -0,0 +1,33 @@
1
+ require 'helper'
2
+
3
+ class TestQuarry < Test::Unit::TestCase
4
+ should 'load' do
5
+ require 'quarry'
6
+ Quarry
7
+ end
8
+
9
+ should 'expose ARFF' do
10
+ Quarry::ARFF
11
+ end
12
+
13
+ should 'allow creating ARFF objects' do
14
+ a = Quarry::ARFF.new("/Users/will/dev/classifier/src/categories.arff")
15
+ end
16
+
17
+ should 'read ARFF objects' do
18
+ a = Quarry::ARFF.new("/Users/will/Desktop/test_reduced.arff")
19
+ ds = a.read
20
+
21
+ puts "\nFeatures: #{ds.features.size}, examples: #{ds.examples.size}"
22
+ classifier = Quarry::Classifier::NaiveBayesClassifier.new(ds)
23
+ print "Created classifier\n"
24
+ cm = ds.cross_fold_validation(classifier, 2)
25
+ print "Cross fold done\n"
26
+ cm.print_summary
27
+
28
+ #ds.stratify(classifier, 10, 0)
29
+ #classifier.classify(ds.examples[0])
30
+ #cm = ds.classify_fold(classifier, 10, 0)
31
+ #cm.print_summary
32
+ end
33
+ end
data/thera.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "thera"
6
+ s.homepage = "http://github.com/willcannings/quarry"
7
+ s.license = "Public Domain"
8
+ s.summary = "Ruby Data Mining Library"
9
+ s.description = "C++ Data Mining Library for Ruby"
10
+ s.email = "me@willcannings.com"
11
+ s.authors = ["Will Cannings"]
12
+ s.version = '0.0.1'
13
+ s.extensions = ["ext/extconf.rb"]
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ['lib', 'ext']
19
+
20
+ s.add_runtime_dependency 'rice'
21
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: thera
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Will Cannings
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-07 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rice
16
+ requirement: &70234812248380 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70234812248380
25
+ description: C++ Data Mining Library for Ruby
26
+ email: me@willcannings.com
27
+ executables: []
28
+ extensions:
29
+ - ext/extconf.rb
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .document
33
+ - .gitignore
34
+ - Gemfile
35
+ - Gemfile.lock
36
+ - LICENSE.txt
37
+ - README.rdoc
38
+ - Rakefile
39
+ - ext/Makefile
40
+ - ext/extconf.rb
41
+ - ext/quarry/quarry_toolkit.cpp
42
+ - lib/quarry/Makefile.linux
43
+ - lib/quarry/Makefile.osx
44
+ - lib/quarry/Makefile.targets
45
+ - lib/quarry/obj/.gitkeep
46
+ - lib/quarry/src/classifier/aode/aode_classifier.cpp
47
+ - lib/quarry/src/classifier/aode/aode_classifier.h
48
+ - lib/quarry/src/classifier/centroid/centroid_classifier.cpp
49
+ - lib/quarry/src/classifier/centroid/centroid_classifier.h
50
+ - lib/quarry/src/classifier/classifier.cpp
51
+ - lib/quarry/src/classifier/classifier.h
52
+ - lib/quarry/src/classifier/knn/knn_classifier.cpp
53
+ - lib/quarry/src/classifier/knn/knn_classifier.h
54
+ - lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.cpp
55
+ - lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.h
56
+ - lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.cpp
57
+ - lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.h
58
+ - lib/quarry/src/data_set/data_set.cpp
59
+ - lib/quarry/src/data_set/data_set.h
60
+ - lib/quarry/src/data_set/dense/dense_data_set.h
61
+ - lib/quarry/src/data_set/dense/dense_example.h
62
+ - lib/quarry/src/data_set/example.cpp
63
+ - lib/quarry/src/data_set/example.h
64
+ - lib/quarry/src/data_set/feature.h
65
+ - lib/quarry/src/data_set/features/nominal_feature.cpp
66
+ - lib/quarry/src/data_set/features/nominal_feature.h
67
+ - lib/quarry/src/data_set/features/numeric_feature.cpp
68
+ - lib/quarry/src/data_set/features/numeric_feature.h
69
+ - lib/quarry/src/data_set/sparse/sparse_data_set.h
70
+ - lib/quarry/src/data_set/sparse/sparse_example.cpp
71
+ - lib/quarry/src/data_set/sparse/sparse_example.h
72
+ - lib/quarry/src/metrics/confusion_matrix.cpp
73
+ - lib/quarry/src/metrics/confusion_matrix.h
74
+ - lib/quarry/src/model/model.cpp
75
+ - lib/quarry/src/model/model.h
76
+ - lib/quarry/src/preprocessing/examples/example_preprocessor.h
77
+ - lib/quarry/src/preprocessing/examples/weights/binary_weight.h
78
+ - lib/quarry/src/preprocessing/examples/weights/local_weight.h
79
+ - lib/quarry/src/preprocessing/text/example_generator/example_generator.h
80
+ - lib/quarry/src/preprocessing/text/example_generator/token_counter.h
81
+ - lib/quarry/src/preprocessing/text/inplace_processor/downcase.h
82
+ - lib/quarry/src/preprocessing/text/inplace_processor/inplace_processor.h
83
+ - lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer.h
84
+ - lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer_original.cpp
85
+ - lib/quarry/src/preprocessing/text/text_pipeline.cpp
86
+ - lib/quarry/src/preprocessing/text/text_pipeline.h
87
+ - lib/quarry/src/preprocessing/text/token_selector/pos_tag_selector.h
88
+ - lib/quarry/src/preprocessing/text/token_selector/stop_words.cpp
89
+ - lib/quarry/src/preprocessing/text/token_selector/stop_words.h
90
+ - lib/quarry/src/preprocessing/text/token_selector/token_selector.h
91
+ - lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
92
+ - lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.h
93
+ - lib/quarry/src/preprocessing/text/tokeniser/tokeniser.h
94
+ - lib/quarry/src/quarry.cpp
95
+ - lib/quarry/src/quarry.h
96
+ - lib/quarry/src/storage/arff.cpp
97
+ - lib/quarry/src/storage/arff.h
98
+ - lib/quarry/src/storage/binary.cpp
99
+ - lib/quarry/src/storage/binary.h
100
+ - lib/quarry/src/storage/folders.cpp
101
+ - lib/quarry/src/storage/folders.h
102
+ - lib/quarry/src/storage/storage.h
103
+ - lib/quarry/src/test.cpp
104
+ - lib/quarry_rb/classifier/classifier.rb
105
+ - lib/quarry_rb/classifier/naive_bayes_classifier.rb
106
+ - lib/quarry_rb/confusion_matrix.rb
107
+ - lib/quarry_rb/data_set/data_set.rb
108
+ - lib/quarry_rb/data_set/example.rb
109
+ - lib/quarry_rb/data_set/feature.rb
110
+ - lib/quarry_rb/enumerable_helper.rb
111
+ - lib/quarry_rb/model/model.rb
112
+ - lib/quarry_rb/storage/arff.rb
113
+ - lib/quarry_rb/storage/binary.rb
114
+ - lib/quarry_rb/storage/folders.rb
115
+ - lib/quarry_rb/text_pipeline.rb
116
+ - lib/thera.rb
117
+ - test/helper.rb
118
+ - test/test_quarry.rb
119
+ - thera.gemspec
120
+ homepage: http://github.com/willcannings/quarry
121
+ licenses:
122
+ - Public Domain
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ - ext
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ! '>='
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ requirements: []
141
+ rubyforge_project:
142
+ rubygems_version: 1.8.10
143
+ signing_key:
144
+ specification_version: 3
145
+ summary: Ruby Data Mining Library
146
+ test_files:
147
+ - test/helper.rb
148
+ - test/test_quarry.rb