thera 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. data/.document +5 -0
  2. data/.gitignore +56 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +20 -0
  5. data/LICENSE.txt +1 -0
  6. data/README.rdoc +8 -0
  7. data/Rakefile +1 -0
  8. data/ext/Makefile +225 -0
  9. data/ext/extconf.rb +29 -0
  10. data/ext/quarry/quarry_toolkit.cpp +148 -0
  11. data/lib/quarry/Makefile.linux +2 -0
  12. data/lib/quarry/Makefile.osx +6 -0
  13. data/lib/quarry/Makefile.targets +23 -0
  14. data/lib/quarry/obj/.gitkeep +0 -0
  15. data/lib/quarry/src/classifier/aode/aode_classifier.cpp +0 -0
  16. data/lib/quarry/src/classifier/aode/aode_classifier.h +0 -0
  17. data/lib/quarry/src/classifier/centroid/centroid_classifier.cpp +0 -0
  18. data/lib/quarry/src/classifier/centroid/centroid_classifier.h +0 -0
  19. data/lib/quarry/src/classifier/classifier.cpp +32 -0
  20. data/lib/quarry/src/classifier/classifier.h +59 -0
  21. data/lib/quarry/src/classifier/knn/knn_classifier.cpp +0 -0
  22. data/lib/quarry/src/classifier/knn/knn_classifier.h +0 -0
  23. data/lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.cpp +40 -0
  24. data/lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.h +18 -0
  25. data/lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.cpp +80 -0
  26. data/lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.h +52 -0
  27. data/lib/quarry/src/data_set/data_set.cpp +130 -0
  28. data/lib/quarry/src/data_set/data_set.h +78 -0
  29. data/lib/quarry/src/data_set/dense/dense_data_set.h +39 -0
  30. data/lib/quarry/src/data_set/dense/dense_example.h +44 -0
  31. data/lib/quarry/src/data_set/example.cpp +10 -0
  32. data/lib/quarry/src/data_set/example.h +23 -0
  33. data/lib/quarry/src/data_set/feature.h +36 -0
  34. data/lib/quarry/src/data_set/features/nominal_feature.cpp +57 -0
  35. data/lib/quarry/src/data_set/features/nominal_feature.h +76 -0
  36. data/lib/quarry/src/data_set/features/numeric_feature.cpp +69 -0
  37. data/lib/quarry/src/data_set/features/numeric_feature.h +78 -0
  38. data/lib/quarry/src/data_set/sparse/sparse_data_set.h +40 -0
  39. data/lib/quarry/src/data_set/sparse/sparse_example.cpp +82 -0
  40. data/lib/quarry/src/data_set/sparse/sparse_example.h +38 -0
  41. data/lib/quarry/src/metrics/confusion_matrix.cpp +129 -0
  42. data/lib/quarry/src/metrics/confusion_matrix.h +82 -0
  43. data/lib/quarry/src/model/model.cpp +29 -0
  44. data/lib/quarry/src/model/model.h +50 -0
  45. data/lib/quarry/src/preprocessing/examples/example_preprocessor.h +20 -0
  46. data/lib/quarry/src/preprocessing/examples/weights/binary_weight.h +20 -0
  47. data/lib/quarry/src/preprocessing/examples/weights/local_weight.h +29 -0
  48. data/lib/quarry/src/preprocessing/text/example_generator/example_generator.h +19 -0
  49. data/lib/quarry/src/preprocessing/text/example_generator/token_counter.h +59 -0
  50. data/lib/quarry/src/preprocessing/text/inplace_processor/downcase.h +26 -0
  51. data/lib/quarry/src/preprocessing/text/inplace_processor/inplace_processor.h +17 -0
  52. data/lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer.h +44 -0
  53. data/lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer_original.cpp +375 -0
  54. data/lib/quarry/src/preprocessing/text/text_pipeline.cpp +29 -0
  55. data/lib/quarry/src/preprocessing/text/text_pipeline.h +37 -0
  56. data/lib/quarry/src/preprocessing/text/token_selector/pos_tag_selector.h +21 -0
  57. data/lib/quarry/src/preprocessing/text/token_selector/stop_words.cpp +82 -0
  58. data/lib/quarry/src/preprocessing/text/token_selector/stop_words.h +20 -0
  59. data/lib/quarry/src/preprocessing/text/token_selector/token_selector.h +17 -0
  60. data/lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp +29 -0
  61. data/lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.h +20 -0
  62. data/lib/quarry/src/preprocessing/text/tokeniser/tokeniser.h +19 -0
  63. data/lib/quarry/src/quarry.cpp +1 -0
  64. data/lib/quarry/src/quarry.h +29 -0
  65. data/lib/quarry/src/storage/arff.cpp +198 -0
  66. data/lib/quarry/src/storage/arff.h +26 -0
  67. data/lib/quarry/src/storage/binary.cpp +457 -0
  68. data/lib/quarry/src/storage/binary.h +79 -0
  69. data/lib/quarry/src/storage/folders.cpp +98 -0
  70. data/lib/quarry/src/storage/folders.h +25 -0
  71. data/lib/quarry/src/storage/storage.h +19 -0
  72. data/lib/quarry/src/test.cpp +6 -0
  73. data/lib/quarry_rb/classifier/classifier.rb +22 -0
  74. data/lib/quarry_rb/classifier/naive_bayes_classifier.rb +10 -0
  75. data/lib/quarry_rb/confusion_matrix.rb +58 -0
  76. data/lib/quarry_rb/data_set/data_set.rb +42 -0
  77. data/lib/quarry_rb/data_set/example.rb +33 -0
  78. data/lib/quarry_rb/data_set/feature.rb +28 -0
  79. data/lib/quarry_rb/enumerable_helper.rb +32 -0
  80. data/lib/quarry_rb/model/model.rb +56 -0
  81. data/lib/quarry_rb/storage/arff.rb +11 -0
  82. data/lib/quarry_rb/storage/binary.rb +23 -0
  83. data/lib/quarry_rb/storage/folders.rb +11 -0
  84. data/lib/quarry_rb/text_pipeline.rb +16 -0
  85. data/lib/thera.rb +20 -0
  86. data/test/helper.rb +19 -0
  87. data/test/test_quarry.rb +33 -0
  88. data/thera.gemspec +21 -0
  89. metadata +148 -0
@@ -0,0 +1,10 @@
1
+ module Quarry
2
+ module Classifier
3
+ class NaiveBayesClassifier < Classifier
4
+ def initialize(data_set)
5
+ @classifier = Quarry::Classifier::ImplNaiveBayesClassifier.new(data_set.data_set)
6
+ super(data_set)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,58 @@
1
+ module Quarry
2
+ class ConfusionMatrix
3
+ attr_reader :confusion_matrix
4
+ attr_reader :data_set
5
+ def initialize(data_set, matrix=nil)
6
+ @data_set = data_set
7
+ if matrix
8
+ @confusion_matrix = matrix
9
+ else
10
+ @confusion_matrix = Quarry::ImplConfusionMatrix.new(data_set.data_set)
11
+ end
12
+ end
13
+
14
+ def add(predicted, actual)
15
+ @confusion_matrix.add(predicted.category, actual.category)
16
+ end
17
+
18
+ def accuracy
19
+ @confusion_matrix.accuracy
20
+ end
21
+
22
+ def error
23
+ @confusion_matrix.error
24
+ end
25
+
26
+ def tp(category)
27
+ @confusion_matrix.tp(category.category)
28
+ end
29
+
30
+ def fp(category)
31
+ @confusion_matrix.fp(category.category)
32
+ end
33
+
34
+ def tn(category)
35
+ @confusion_matrix.tn(category.category)
36
+ end
37
+
38
+ def fn(category)
39
+ @confusion_matrix.fn(category.category)
40
+ end
41
+
42
+ def precision(category)
43
+ @confusion_matrix.precision(category)
44
+ end
45
+
46
+ def recall(category)
47
+ @confusion_matrix.recall(category)
48
+ end
49
+
50
+ def fscore(category)
51
+ @confusion_matrix.fscore(category)
52
+ end
53
+
54
+ def print_summary
55
+ @confusion_matrix.print_summary
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,42 @@
1
+ module Quarry
2
+ module DataSet
3
+ class DataSet
4
+ attr_reader :data_set
5
+ def initialize(data_set=nil)
6
+ @data_set = data_set || ImplDataSet.new()
7
+ end
8
+
9
+ def name
10
+ @data_set.get_name
11
+ end
12
+
13
+ def name=(new_name)
14
+ @data_set.set_name(new_name)
15
+ end
16
+
17
+ def examples
18
+ @examples ||= EnumerableHelper.new(self, @data_set, Example, :examples_size, :get_example_by_index)
19
+ end
20
+
21
+ def categories
22
+ @categories ||= EnumerableHelper.new(self, @data_set, Category, :categories_size, :get_category_by_index)
23
+ end
24
+
25
+ def features
26
+ @features ||= EnumerableHelper.new(self, @data_set, Feature, :features_size, :get_feature_by_index)
27
+ end
28
+
29
+ def stratify(classifier, folds, skip_fold)
30
+ @data_set.stratify(classifier.classifier, folds, skip_fold)
31
+ end
32
+
33
+ def classify_fold(classifier, folds, fold)
34
+ ConfusionMatrix.new(self, @data_set.classify_fold(classifier.classifier, folds, fold))
35
+ end
36
+
37
+ def cross_fold_validation(classifier, folds)
38
+ ConfusionMatrix.new(self, @data_set.cross_fold_validation(classifier.classifier, folds))
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,33 @@
1
+ module Quarry
2
+ module DataSet
3
+ class Example
4
+ attr_reader :example
5
+
6
+ def initialize(example, data_set)
7
+ @data_set = data_set
8
+ @example = example
9
+ end
10
+
11
+ def [](index)
12
+ @example.get_value(index)
13
+ end
14
+
15
+ def []=(index, value)
16
+ @example.set_value(index, value)
17
+ end
18
+
19
+ def category
20
+ @data_set.categories[@example.get_category_index]
21
+ end
22
+
23
+ def category_index
24
+ @example.get_category_index
25
+ end
26
+
27
+ def category=(new_category)
28
+ raise "new_category must be an instance of Quarry::DataSet::Category" unless new_category.is_a?(Category)
29
+ @example.set_category(new_category.category)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,28 @@
1
+ module Quarry
2
+ module DataSet
3
+ class Feature
4
+ attr_reader :feature
5
+
6
+ def initialize(feature, data_set)
7
+ @data_set = data_set
8
+ @feature = feature
9
+ end
10
+
11
+ def name
12
+ @feature.get_name
13
+ end
14
+
15
+ def name=(new_name)
16
+ @feature.set_name(new_name)
17
+ end
18
+
19
+ def type
20
+ @feature.get_type
21
+ end
22
+
23
+ def type=(new_type)
24
+ @feature.set_type(new_type)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,32 @@
1
+ module Quarry
2
+ class EnumerableHelper
3
+ include Enumerable
4
+ attr_reader :size
5
+
6
+ def initialize(data_set, container, klass, size, get_fn)
7
+ @size = container.send(size)
8
+ @container = container
9
+ @data_set = data_set
10
+ @get_fn = get_fn
11
+ @klass = klass
12
+
13
+ # because of the way the Ruby GC works, it's easier to store
14
+ # references to enumerated objects here than from the C++ side.
15
+ # by keeping a reference to returned objects in this object,
16
+ # iterated objects that shouldn't be released (e.g Examples)
17
+ # won't be until the data set is released.
18
+ @objects = Hash.new {|hash, index| hash[index] = @klass.new(@container.send(@get_fn, index), @data_set)}
19
+ end
20
+
21
+ def [](index)
22
+ return nil if (index < 0) || (index >= @size)
23
+ @objects[index]
24
+ end
25
+
26
+ def each
27
+ (0...@size).each do |index|
28
+ yield @objects[index]
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,56 @@
1
+ module Quarry
2
+ class Model
3
+ attr_reader :model
4
+ def initialize(m = nil)
5
+ @model = m || Quarry::ImplModel.new
6
+ end
7
+
8
+ def data_set
9
+ Quarry::DataSet::DataSet.new(@model.get_data_set)
10
+ end
11
+
12
+ def data_set=(ds)
13
+ @model.set_data_set(ds.data_set)
14
+ end
15
+
16
+ # def classifier
17
+ # @model.get_classifier
18
+ # end
19
+
20
+ def classifier=(c)
21
+ @model.set_classifier(c.classifier)
22
+ end
23
+
24
+ def text_pipeline
25
+ TextPipeline.new(@model.get_text_pipeline)
26
+ end
27
+
28
+ def text_pipeline=(t)
29
+ @model.set_text_pipeline(t.text_pipeline)
30
+ end
31
+
32
+ def train(example)
33
+ @model.train(example)
34
+ end
35
+
36
+ def train_text(text)
37
+ @model.train_text(text)
38
+ end
39
+
40
+ def classify(example)
41
+ @model.classify(example)
42
+ end
43
+
44
+ def classify_text(text)
45
+ @model.classify_text(text)
46
+ end
47
+
48
+ def rank(example)
49
+ @model.rank(example)
50
+ end
51
+
52
+ def rank_text(text)
53
+ @model.rank_text(text)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,11 @@
1
+ module Quarry
2
+ class ARFF
3
+ def initialize(path)
4
+ @arff = Quarry::ImplARFF.new(path)
5
+ end
6
+
7
+ def read
8
+ Quarry::DataSet::DataSet.new(@arff.read)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,23 @@
1
+ module Quarry
2
+ class Binary
3
+ def initialize(path)
4
+ @binary = Quarry::ImplBinary.new(path)
5
+ end
6
+
7
+ def read
8
+ Quarry::DataSet::DataSet.new(@binary.read)
9
+ end
10
+
11
+ def write(data_set)
12
+ @binary.write(data_set.data_set)
13
+ end
14
+
15
+ def read_model
16
+ Quarry::Model.new(@binary.read_model)
17
+ end
18
+
19
+ def write_model(model)
20
+ @binary.write_model(model.model)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,11 @@
1
+ module Quarry
2
+ class Folders
3
+ def initialize(path, pipeline)
4
+ @folders = Quarry::ImplFolders.new(path, pipeline.text_pipeline)
5
+ end
6
+
7
+ def read
8
+ Quarry::DataSet::DataSet.new(@folders.read)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,16 @@
1
+ module Quarry
2
+ class TextPipeline
3
+ attr_reader :text_pipeline
4
+ def initialize(tp = nil)
5
+ @text_pipeline = tp || Quarry::ImplTextPipeline.new
6
+ end
7
+
8
+ def process_text(data_set, text)
9
+ Example.new(@text_pipeline.process_text(data_set, text))
10
+ end
11
+
12
+ def self.standard_pipeline
13
+ new(Quarry::Preprocessing::Text.standard_pipeline)
14
+ end
15
+ end
16
+ end
data/lib/thera.rb ADDED
@@ -0,0 +1,20 @@
1
+ require 'quarry_toolkit'
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ require './quarry_rb/text_pipeline'
4
+
5
+ require './quarry_rb/model/model'
6
+
7
+ require './quarry_rb/storage/arff'
8
+ require './quarry_rb/storage/binary'
9
+ require './quarry_rb/storage/folders'
10
+
11
+ require './quarry_rb/enumerable_helper'
12
+ require './quarry_rb/confusion_matrix'
13
+
14
+ require './quarry_rb/data_set/data_set'
15
+ require './quarry_rb/data_set/example'
16
+ require './quarry_rb/data_set/feature'
17
+
18
+ require './quarry_rb/classifier/classifier'
19
+ require './quarry_rb/classifier/naive_bayes_classifier'
20
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext'))
14
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
16
+ require 'quarry'
17
+
18
+ class Test::Unit::TestCase
19
+ end
@@ -0,0 +1,33 @@
1
+ require 'helper'
2
+
3
+ class TestQuarry < Test::Unit::TestCase
4
+ should 'load' do
5
+ require 'quarry'
6
+ Quarry
7
+ end
8
+
9
+ should 'expose ARFF' do
10
+ Quarry::ARFF
11
+ end
12
+
13
+ should 'allow creating ARFF objects' do
14
+ a = Quarry::ARFF.new("/Users/will/dev/classifier/src/categories.arff")
15
+ end
16
+
17
+ should 'read ARFF objects' do
18
+ a = Quarry::ARFF.new("/Users/will/Desktop/test_reduced.arff")
19
+ ds = a.read
20
+
21
+ puts "\nFeatures: #{ds.features.size}, examples: #{ds.examples.size}"
22
+ classifier = Quarry::Classifier::NaiveBayesClassifier.new(ds)
23
+ print "Created classifier\n"
24
+ cm = ds.cross_fold_validation(classifier, 2)
25
+ print "Cross fold done\n"
26
+ cm.print_summary
27
+
28
+ #ds.stratify(classifier, 10, 0)
29
+ #classifier.classify(ds.examples[0])
30
+ #cm = ds.classify_fold(classifier, 10, 0)
31
+ #cm.print_summary
32
+ end
33
+ end
data/thera.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "thera"
6
+ s.homepage = "http://github.com/willcannings/quarry"
7
+ s.license = "Public Domain"
8
+ s.summary = "Ruby Data Mining Library"
9
+ s.description = "C++ Data Mining Library for Ruby"
10
+ s.email = "me@willcannings.com"
11
+ s.authors = ["Will Cannings"]
12
+ s.version = '0.0.1'
13
+ s.extensions = ["ext/extconf.rb"]
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ['lib', 'ext']
19
+
20
+ s.add_runtime_dependency 'rice'
21
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: thera
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Will Cannings
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-07 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rice
16
+ requirement: &70234812248380 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70234812248380
25
+ description: C++ Data Mining Library for Ruby
26
+ email: me@willcannings.com
27
+ executables: []
28
+ extensions:
29
+ - ext/extconf.rb
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .document
33
+ - .gitignore
34
+ - Gemfile
35
+ - Gemfile.lock
36
+ - LICENSE.txt
37
+ - README.rdoc
38
+ - Rakefile
39
+ - ext/Makefile
40
+ - ext/extconf.rb
41
+ - ext/quarry/quarry_toolkit.cpp
42
+ - lib/quarry/Makefile.linux
43
+ - lib/quarry/Makefile.osx
44
+ - lib/quarry/Makefile.targets
45
+ - lib/quarry/obj/.gitkeep
46
+ - lib/quarry/src/classifier/aode/aode_classifier.cpp
47
+ - lib/quarry/src/classifier/aode/aode_classifier.h
48
+ - lib/quarry/src/classifier/centroid/centroid_classifier.cpp
49
+ - lib/quarry/src/classifier/centroid/centroid_classifier.h
50
+ - lib/quarry/src/classifier/classifier.cpp
51
+ - lib/quarry/src/classifier/classifier.h
52
+ - lib/quarry/src/classifier/knn/knn_classifier.cpp
53
+ - lib/quarry/src/classifier/knn/knn_classifier.h
54
+ - lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.cpp
55
+ - lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.h
56
+ - lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.cpp
57
+ - lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.h
58
+ - lib/quarry/src/data_set/data_set.cpp
59
+ - lib/quarry/src/data_set/data_set.h
60
+ - lib/quarry/src/data_set/dense/dense_data_set.h
61
+ - lib/quarry/src/data_set/dense/dense_example.h
62
+ - lib/quarry/src/data_set/example.cpp
63
+ - lib/quarry/src/data_set/example.h
64
+ - lib/quarry/src/data_set/feature.h
65
+ - lib/quarry/src/data_set/features/nominal_feature.cpp
66
+ - lib/quarry/src/data_set/features/nominal_feature.h
67
+ - lib/quarry/src/data_set/features/numeric_feature.cpp
68
+ - lib/quarry/src/data_set/features/numeric_feature.h
69
+ - lib/quarry/src/data_set/sparse/sparse_data_set.h
70
+ - lib/quarry/src/data_set/sparse/sparse_example.cpp
71
+ - lib/quarry/src/data_set/sparse/sparse_example.h
72
+ - lib/quarry/src/metrics/confusion_matrix.cpp
73
+ - lib/quarry/src/metrics/confusion_matrix.h
74
+ - lib/quarry/src/model/model.cpp
75
+ - lib/quarry/src/model/model.h
76
+ - lib/quarry/src/preprocessing/examples/example_preprocessor.h
77
+ - lib/quarry/src/preprocessing/examples/weights/binary_weight.h
78
+ - lib/quarry/src/preprocessing/examples/weights/local_weight.h
79
+ - lib/quarry/src/preprocessing/text/example_generator/example_generator.h
80
+ - lib/quarry/src/preprocessing/text/example_generator/token_counter.h
81
+ - lib/quarry/src/preprocessing/text/inplace_processor/downcase.h
82
+ - lib/quarry/src/preprocessing/text/inplace_processor/inplace_processor.h
83
+ - lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer.h
84
+ - lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer_original.cpp
85
+ - lib/quarry/src/preprocessing/text/text_pipeline.cpp
86
+ - lib/quarry/src/preprocessing/text/text_pipeline.h
87
+ - lib/quarry/src/preprocessing/text/token_selector/pos_tag_selector.h
88
+ - lib/quarry/src/preprocessing/text/token_selector/stop_words.cpp
89
+ - lib/quarry/src/preprocessing/text/token_selector/stop_words.h
90
+ - lib/quarry/src/preprocessing/text/token_selector/token_selector.h
91
+ - lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
92
+ - lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.h
93
+ - lib/quarry/src/preprocessing/text/tokeniser/tokeniser.h
94
+ - lib/quarry/src/quarry.cpp
95
+ - lib/quarry/src/quarry.h
96
+ - lib/quarry/src/storage/arff.cpp
97
+ - lib/quarry/src/storage/arff.h
98
+ - lib/quarry/src/storage/binary.cpp
99
+ - lib/quarry/src/storage/binary.h
100
+ - lib/quarry/src/storage/folders.cpp
101
+ - lib/quarry/src/storage/folders.h
102
+ - lib/quarry/src/storage/storage.h
103
+ - lib/quarry/src/test.cpp
104
+ - lib/quarry_rb/classifier/classifier.rb
105
+ - lib/quarry_rb/classifier/naive_bayes_classifier.rb
106
+ - lib/quarry_rb/confusion_matrix.rb
107
+ - lib/quarry_rb/data_set/data_set.rb
108
+ - lib/quarry_rb/data_set/example.rb
109
+ - lib/quarry_rb/data_set/feature.rb
110
+ - lib/quarry_rb/enumerable_helper.rb
111
+ - lib/quarry_rb/model/model.rb
112
+ - lib/quarry_rb/storage/arff.rb
113
+ - lib/quarry_rb/storage/binary.rb
114
+ - lib/quarry_rb/storage/folders.rb
115
+ - lib/quarry_rb/text_pipeline.rb
116
+ - lib/thera.rb
117
+ - test/helper.rb
118
+ - test/test_quarry.rb
119
+ - thera.gemspec
120
+ homepage: http://github.com/willcannings/quarry
121
+ licenses:
122
+ - Public Domain
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ - ext
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ! '>='
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ requirements: []
141
+ rubyforge_project:
142
+ rubygems_version: 1.8.10
143
+ signing_key:
144
+ specification_version: 3
145
+ summary: Ruby Data Mining Library
146
+ test_files:
147
+ - test/helper.rb
148
+ - test/test_quarry.rb