treat 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +3 -3
  3. data/lib/treat/config.rb +10 -0
  4. data/lib/treat/core/data_set.rb +80 -32
  5. data/lib/treat/core/feature.rb +35 -0
  6. data/lib/treat/core/problem.rb +43 -0
  7. data/lib/treat/core/question.rb +27 -0
  8. data/lib/treat/entities/abilities/buildable.rb +5 -3
  9. data/lib/treat/entities/abilities/exportable.rb +4 -4
  10. data/lib/treat/entities/collection.rb +1 -1
  11. data/lib/treat/entities/document.rb +1 -1
  12. data/lib/treat/entities/group.rb +8 -5
  13. data/lib/treat/entities/section.rb +1 -1
  14. data/lib/treat/entities/token.rb +20 -8
  15. data/lib/treat/entities/zone.rb +6 -5
  16. data/lib/treat/loaders/linguistics.rb +18 -19
  17. data/lib/treat/loaders/stanford.rb +3 -2
  18. data/lib/treat/version.rb +1 -1
  19. data/lib/treat/workers/extractors/language/what_language.rb +53 -57
  20. data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -5
  21. data/lib/treat/workers/formatters/serializers/mongo.rb +33 -27
  22. data/lib/treat/workers/formatters/unserializers/mongo.rb +14 -36
  23. data/lib/treat/workers/learners/classifiers/id3.rb +4 -5
  24. data/lib/treat/workers/learners/classifiers/mlp.rb +1 -1
  25. data/lib/treat/workers.rb +1 -1
  26. data/spec/entity.rb +7 -5
  27. data/spec/phrase.rb +2 -2
  28. data/spec/zone.rb +2 -3
  29. metadata +37 -15
  30. data/bin/stanford/bridge.jar +0 -0
  31. data/bin/stanford/joda-time.jar +0 -0
  32. data/bin/stanford/stanford-corenlp.jar +0 -0
  33. data/bin/stanford/stanford-parser.jar +0 -0
  34. data/bin/stanford/xom.jar +0 -0
  35. data/files/21552208.html +0 -683
  36. data/files/3_2_release_notes.html +0 -766
  37. data/files/nethttp-cheat-sheet-2940.html +0 -395
  38. data/files/weather-central-canada-heat-wave.html +0 -1370
  39. data/lib/treat/core/classification.rb +0 -63
  40. data/lib/treat/core/server.rb +0 -3
  41. data/spec/sandbox.rb +0 -223
  42. data/tmp/english.yaml +0 -10340
@@ -6,55 +6,61 @@ class Treat::Workers::Formatters::Serializers::Mongo
6
6
 
7
7
  DefaultOptions = {
8
8
  :recursive => true,
9
- :stop_at => :token
9
+ :stop_at => nil
10
10
  }
11
-
11
+
12
12
  def self.serialize(entity, options = {})
13
-
13
+
14
14
  options = DefaultOptions.merge(options)
15
- stop_at = options[:stop_at] ?
15
+ options[:stop_at] = options[:stop_at] ?
16
16
  Treat::Entities.const_get(
17
- options[:stop_at].to_s.capitalize) :
18
- Treat::Entities::Token
19
-
17
+ options[:stop_at].to_s.capitalize) : nil
18
+
20
19
  if !Treat.databases.mongo.db && !options[:db]
21
20
  raise Treat::Exception,
22
21
  'Must supply the database name in config. ' +
23
22
  '(Treat.databases.mongo.db = ...) or pass ' +
24
23
  'it as a parameter to #serialize.'
25
24
  end
26
-
25
+
27
26
  @@database ||= Mongo::Connection.
28
27
  new(Treat.databases.mongo.host).
29
28
  db(Treat.databases.mongo.db || options[:db])
30
-
31
- type = cl(entity.class.superclass).downcase
32
- type = entity.type.to_s if type == 'entity'
33
- types = type + 's'
34
29
 
35
- coll = @@database.collection(types)
36
-
30
+ supertype = cl(Treat::Entities.const_get(
31
+ entity.type.to_s.capitalize.intern).superclass).downcase
32
+ supertype = entity.type.to_s if supertype == 'entity'
33
+ supertypes = supertype + 's'
34
+
35
+ coll = @@database.collection(supertypes)
36
+ entity_token = self.do_serialize(entity, options)
37
+ coll.update({id: entity.id}, entity_token, {upsert: true})
38
+ end
39
+
40
+ def self.do_serialize(entity, options)
41
+
42
+ children = []
43
+
44
+ if options[:recursive] && entity.has_children?
45
+ entity.each do |child|
46
+ next if options[:stop_at] && child.class.
47
+ compare_with(options[:stop_at]) < 0
48
+ children << self.do_serialize(child, options)
49
+ end
50
+ end
51
+
37
52
  entity_token = {
38
53
  :id => entity.id,
39
54
  :value => entity.value,
40
55
  :string => entity.to_s,
41
- :type => entity.type,
42
- :children => entity.children.map { |c| [c.id, c.type] },
56
+ :type => entity.type.to_s,
57
+ :children => children,
43
58
  :parent => (entity.has_parent? ? entity.parent.id : nil),
44
59
  :features => entity.features
45
60
  }
46
-
47
- coll.insert(entity_token)
48
61
 
49
- if options[:recursive] && entity.has_children?
50
- entity.each do |child|
51
- next if child.class.compare_with(stop_at) < 0
52
- self.serialize(child, options)
53
- end
54
- end
55
-
62
+ entity_token
63
+
56
64
  end
57
65
 
58
66
  end
59
-
60
-
@@ -1,20 +1,9 @@
1
1
  module Treat::Workers::Formatters::Unserializers::Mongo
2
-
3
- DefaultOptions = {
4
- :recursive => true,
5
- :stop_at => nil
6
- }
7
2
 
8
3
  require 'mongo'
9
4
 
10
5
  def self.unserialize(entity, options={})
11
6
 
12
- options = DefaultOptions.merge(options)
13
- options[:stop_at] = options[:stop_at] ?
14
- Treat::Entities.const_get(
15
- options[:stop_at].to_s.capitalize) :
16
- Treat::Entities::Token
17
-
18
7
  if !Treat.databases.mongo.db && !options[:db]
19
8
  raise Treat::Exception,
20
9
  'Must supply the database name in config. ' +
@@ -25,12 +14,6 @@ module Treat::Workers::Formatters::Unserializers::Mongo
25
14
  @@database ||= Mongo::Connection.
26
15
  new(Treat.databases.mongo.host).
27
16
  db(Treat.databases.mongo.db || options[:db])
28
-
29
- self.do_unserialize(entity, options)
30
-
31
- end
32
-
33
- def self.do_unserialize(entity, options)
34
17
 
35
18
  supertype = cl(Treat::Entities.const_get(
36
19
  entity.type.to_s.capitalize.intern).superclass).downcase
@@ -39,38 +22,33 @@ module Treat::Workers::Formatters::Unserializers::Mongo
39
22
 
40
23
  coll = @@database.collection(supertypes)
41
24
  record = coll.find_one(:id => entity.id)
42
-
25
+
43
26
  unless record
44
27
  raise Treat::Exception,
45
28
  "Couldn't find record ID #{entity.id}."
46
29
  end
30
+
31
+ self.do_unserialize(record, options)
32
+
33
+ end
34
+
35
+ def self.do_unserialize(record, options)
36
+
37
+ entity = Treat::Entities.
38
+ const_get(record['type'].
39
+ capitalize.intern).new(
40
+ record['value'], record['id'])
47
41
 
48
- # Convert feature keys to symbols.
49
42
  features = record['features']
50
43
  new_feat = {}
51
44
  features.each do |feature, value|
52
45
  new_feat[feature.intern] = value
53
46
  end
54
- entity.features = new_feat
55
47
 
56
- # Set the entity's value.
57
- entity.value = record['value']
58
-
59
- if entity.class.compare_with(
60
- options[:stop_at]) == 0
61
- entity.value = record['string']
62
- end
48
+ entity.features = new_feat
63
49
 
64
- return entity unless options[:recursive]
65
-
66
50
  record['children'].each do |c|
67
- cid, ctype = *c
68
- cklass = Treat::Entities.const_get(
69
- ctype.capitalize.intern)
70
- next if cklass.compare_with(
71
- options[:stop_at]) < 0
72
- entity << self.do_unserialize(
73
- cklass.new('', cid), options)
51
+ entity << self.do_unserialize(c, options)
74
52
  end
75
53
 
76
54
  entity
@@ -7,22 +7,21 @@ class Treat::Workers::Learners::Classifiers::ID3
7
7
  def self.classify(entity, options = {})
8
8
 
9
9
  set = options[:training]
10
- cl = set.classification
10
+ cl = set.problem
11
11
 
12
12
  if !@@classifiers[cl]
13
13
  dec_tree = DecisionTree::ID3Tree.new(
14
- set.labels.map { |l| l.to_s }, set.items,
15
- cl.default, cl.mode)
14
+ cl.labels.map { |l| l.to_s }, set.items, cl.question.default,
15
+ cl.question.type)
16
16
  dec_tree.train
17
17
  @@classifiers[cl] = dec_tree
18
18
  else
19
19
  dec_tree = @@classifiers[cl]
20
+ dec_tree.graph('testingbitch')
20
21
  end
21
-
22
22
  dec_tree.predict(
23
23
  cl.export_item(entity, false)
24
24
  )
25
-
26
25
  end
27
26
 
28
27
  end
@@ -8,7 +8,7 @@ class Treat::Workers::Learners::Classifiers::MLP
8
8
  def self.classify(entity, options = {})
9
9
 
10
10
  set = options[:training]
11
- cl = set.classification
11
+ cl = set.problem
12
12
 
13
13
  if !@@mlps[cl]
14
14
  net = Ai4r::NeuralNetwork::
data/lib/treat/workers.rb CHANGED
@@ -34,7 +34,7 @@ module Treat::Workers
34
34
  def self.create_category(name, conf)
35
35
  category = self.const_set(name, Module.new)
36
36
  conf.each_pair do |group, worker|
37
- name = group.to_s.capitalize.intern
37
+ name = cc(group.to_s).intern
38
38
  category.module_eval do
39
39
  @@methods = []; def methods;
40
40
  @@methods; end; def groups;
data/spec/entity.rb CHANGED
@@ -113,12 +113,14 @@ describe Treat::Entities::Entity do
113
113
  describe "Exportable" do
114
114
 
115
115
  context "when supplied with a classification to export" do
116
- classification = Treat::Core::Classification.new(:word, :tag, :is_keyword)
116
+ feature = Treat::Core::Feature.new(:tag)
117
+ question = Treat::Core::Question.new(:is_keyword, :word, :discrete, false)
118
+ problem = Treat::Core::Problem.new(question, feature)
117
119
  it "returns a data set with the exported features" do
118
- ds = @sentence.export(classification)
119
- ds.classification.should eql classification
120
- ds.labels.should eql [:tag]
121
- ds.ids.should eql @sentence.words.map { |w| w.id }
120
+ ds = @sentence.export(problem)
121
+ ds.problem.should eql problem
122
+ ds.problem.labels.should eql [:tag]
123
+ ds.entities.should eql @sentence.words.map { |w| w.id }
122
124
  ds.items.should eql [
123
125
  ["DT", false], ["JJ", false],
124
126
  ["NN", false], ["VBZ", false],
data/spec/phrase.rb CHANGED
@@ -42,7 +42,7 @@ describe Treat::Entities::Phrase do
42
42
 
43
43
  describe "#time" do
44
44
  it "returns a DateTime object representing the time in the phrase" do
45
- Treat.languages.english[:workers][:extractors][:time].each do |e|
45
+ Treat.languages.english.workers.extractors.time.each do |e|
46
46
  t = 'october 2006'.time(e)
47
47
  t.month.should eql 10
48
48
  end
@@ -55,7 +55,7 @@ describe Treat::Entities::Phrase do
55
55
  describe "#tokenize" do
56
56
 
57
57
  it "splits a phrase/sentence into tokens and adds them as children of the phrase" do
58
- Treat.languages.english[:workers][:processors][:tokenizers].each do |t|
58
+ Treat.languages.english.workers.processors.tokenizers.each do |t|
59
59
  @phrase = Treat::Entities::Phrase.new('a phrase to tokenize')
60
60
  @phrase.tokenize(t)
61
61
  @phrase.children.should eql @phrase.tokens
data/spec/zone.rb CHANGED
@@ -5,13 +5,12 @@ describe Treat::Entities::Zone do
5
5
  describe "Processable" do
6
6
 
7
7
  before do
8
- @processors = Treat.languages.
9
- english[:workers][:processors]
8
+ @processors = Treat.languages.english.workers.processors
10
9
  end
11
10
  describe "#segment" do
12
11
 
13
12
  it "splits a zone into phrases/sentences and adds them as children of the zone" do
14
- @processors[:segmenters].each do |s|
13
+ @processors.segmenters.each do |s|
15
14
  paragraph = Treat::Entities::Paragraph.new(
16
15
  "This is a first sentence inside the first paragraph. " +
17
16
  "This is the second sentence that is inside the paragraph.")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treat
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-11 00:00:00.000000000 Z
12
+ date: 2012-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: schiphol
@@ -27,6 +27,38 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: birch
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: sourcify
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
30
62
  - !ruby/object:Gem::Dependency
31
63
  name: rspec
32
64
  requirement: !ruby/object:Gem::Requirement
@@ -67,11 +99,6 @@ extensions: []
67
99
  extra_rdoc_files: []
68
100
  files:
69
101
  - bin/MANIFEST
70
- - bin/stanford/bridge.jar
71
- - bin/stanford/joda-time.jar
72
- - bin/stanford/stanford-corenlp.jar
73
- - bin/stanford/stanford-parser.jar
74
- - bin/stanford/xom.jar
75
102
  - lib/treat/config/core/acronyms.rb
76
103
  - lib/treat/config/core/encodings.rb
77
104
  - lib/treat/config/core/entities.rb
@@ -110,10 +137,11 @@ files:
110
137
  - lib/treat/config/workers/processors.rb
111
138
  - lib/treat/config/workers/retrievers.rb
112
139
  - lib/treat/config.rb
113
- - lib/treat/core/classification.rb
114
140
  - lib/treat/core/data_set.rb
141
+ - lib/treat/core/feature.rb
115
142
  - lib/treat/core/node.rb
116
- - lib/treat/core/server.rb
143
+ - lib/treat/core/problem.rb
144
+ - lib/treat/core/question.rb
117
145
  - lib/treat/core.rb
118
146
  - lib/treat/entities/abilities/buildable.rb
119
147
  - lib/treat/entities/abilities/checkable.rb
@@ -224,18 +252,12 @@ files:
224
252
  - spec/samples/mathematicians/gauss.pdf
225
253
  - spec/samples/mathematicians/leibniz.txt
226
254
  - spec/samples/mathematicians/newton.doc
227
- - spec/sandbox.rb
228
255
  - spec/token.rb
229
256
  - spec/treat.rb
230
257
  - spec/word.rb
231
258
  - spec/zone.rb
232
- - tmp/english.yaml
233
259
  - tmp/MANIFEST
234
- - files/21552208.html
235
- - files/3_2_release_notes.html
236
260
  - files/MANIFEST
237
- - files/nethttp-cheat-sheet-2940.html
238
- - files/weather-central-canada-heat-wave.html
239
261
  - README.md
240
262
  - LICENSE
241
263
  homepage: https://github.com/louismullie/treat
Binary file
Binary file
Binary file
Binary file
data/bin/stanford/xom.jar DELETED
Binary file