treat 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +3 -3
  3. data/lib/treat/config.rb +10 -0
  4. data/lib/treat/core/data_set.rb +80 -32
  5. data/lib/treat/core/feature.rb +35 -0
  6. data/lib/treat/core/problem.rb +43 -0
  7. data/lib/treat/core/question.rb +27 -0
  8. data/lib/treat/entities/abilities/buildable.rb +5 -3
  9. data/lib/treat/entities/abilities/exportable.rb +4 -4
  10. data/lib/treat/entities/collection.rb +1 -1
  11. data/lib/treat/entities/document.rb +1 -1
  12. data/lib/treat/entities/group.rb +8 -5
  13. data/lib/treat/entities/section.rb +1 -1
  14. data/lib/treat/entities/token.rb +20 -8
  15. data/lib/treat/entities/zone.rb +6 -5
  16. data/lib/treat/loaders/linguistics.rb +18 -19
  17. data/lib/treat/loaders/stanford.rb +3 -2
  18. data/lib/treat/version.rb +1 -1
  19. data/lib/treat/workers/extractors/language/what_language.rb +53 -57
  20. data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -5
  21. data/lib/treat/workers/formatters/serializers/mongo.rb +33 -27
  22. data/lib/treat/workers/formatters/unserializers/mongo.rb +14 -36
  23. data/lib/treat/workers/learners/classifiers/id3.rb +4 -5
  24. data/lib/treat/workers/learners/classifiers/mlp.rb +1 -1
  25. data/lib/treat/workers.rb +1 -1
  26. data/spec/entity.rb +7 -5
  27. data/spec/phrase.rb +2 -2
  28. data/spec/zone.rb +2 -3
  29. metadata +37 -15
  30. data/bin/stanford/bridge.jar +0 -0
  31. data/bin/stanford/joda-time.jar +0 -0
  32. data/bin/stanford/stanford-corenlp.jar +0 -0
  33. data/bin/stanford/stanford-parser.jar +0 -0
  34. data/bin/stanford/xom.jar +0 -0
  35. data/files/21552208.html +0 -683
  36. data/files/3_2_release_notes.html +0 -766
  37. data/files/nethttp-cheat-sheet-2940.html +0 -395
  38. data/files/weather-central-canada-heat-wave.html +0 -1370
  39. data/lib/treat/core/classification.rb +0 -63
  40. data/lib/treat/core/server.rb +0 -3
  41. data/spec/sandbox.rb +0 -223
  42. data/tmp/english.yaml +0 -10340
@@ -6,55 +6,61 @@ class Treat::Workers::Formatters::Serializers::Mongo
6
6
 
7
7
  DefaultOptions = {
8
8
  :recursive => true,
9
- :stop_at => :token
9
+ :stop_at => nil
10
10
  }
11
-
11
+
12
12
  def self.serialize(entity, options = {})
13
-
13
+
14
14
  options = DefaultOptions.merge(options)
15
- stop_at = options[:stop_at] ?
15
+ options[:stop_at] = options[:stop_at] ?
16
16
  Treat::Entities.const_get(
17
- options[:stop_at].to_s.capitalize) :
18
- Treat::Entities::Token
19
-
17
+ options[:stop_at].to_s.capitalize) : nil
18
+
20
19
  if !Treat.databases.mongo.db && !options[:db]
21
20
  raise Treat::Exception,
22
21
  'Must supply the database name in config. ' +
23
22
  '(Treat.databases.mongo.db = ...) or pass ' +
24
23
  'it as a parameter to #serialize.'
25
24
  end
26
-
25
+
27
26
  @@database ||= Mongo::Connection.
28
27
  new(Treat.databases.mongo.host).
29
28
  db(Treat.databases.mongo.db || options[:db])
30
-
31
- type = cl(entity.class.superclass).downcase
32
- type = entity.type.to_s if type == 'entity'
33
- types = type + 's'
34
29
 
35
- coll = @@database.collection(types)
36
-
30
+ supertype = cl(Treat::Entities.const_get(
31
+ entity.type.to_s.capitalize.intern).superclass).downcase
32
+ supertype = entity.type.to_s if supertype == 'entity'
33
+ supertypes = supertype + 's'
34
+
35
+ coll = @@database.collection(supertypes)
36
+ entity_token = self.do_serialize(entity, options)
37
+ coll.update({id: entity.id}, entity_token, {upsert: true})
38
+ end
39
+
40
+ def self.do_serialize(entity, options)
41
+
42
+ children = []
43
+
44
+ if options[:recursive] && entity.has_children?
45
+ entity.each do |child|
46
+ next if options[:stop_at] && child.class.
47
+ compare_with(options[:stop_at]) < 0
48
+ children << self.do_serialize(child, options)
49
+ end
50
+ end
51
+
37
52
  entity_token = {
38
53
  :id => entity.id,
39
54
  :value => entity.value,
40
55
  :string => entity.to_s,
41
- :type => entity.type,
42
- :children => entity.children.map { |c| [c.id, c.type] },
56
+ :type => entity.type.to_s,
57
+ :children => children,
43
58
  :parent => (entity.has_parent? ? entity.parent.id : nil),
44
59
  :features => entity.features
45
60
  }
46
-
47
- coll.insert(entity_token)
48
61
 
49
- if options[:recursive] && entity.has_children?
50
- entity.each do |child|
51
- next if child.class.compare_with(stop_at) < 0
52
- self.serialize(child, options)
53
- end
54
- end
55
-
62
+ entity_token
63
+
56
64
  end
57
65
 
58
66
  end
59
-
60
-
@@ -1,20 +1,9 @@
1
1
  module Treat::Workers::Formatters::Unserializers::Mongo
2
-
3
- DefaultOptions = {
4
- :recursive => true,
5
- :stop_at => nil
6
- }
7
2
 
8
3
  require 'mongo'
9
4
 
10
5
  def self.unserialize(entity, options={})
11
6
 
12
- options = DefaultOptions.merge(options)
13
- options[:stop_at] = options[:stop_at] ?
14
- Treat::Entities.const_get(
15
- options[:stop_at].to_s.capitalize) :
16
- Treat::Entities::Token
17
-
18
7
  if !Treat.databases.mongo.db && !options[:db]
19
8
  raise Treat::Exception,
20
9
  'Must supply the database name in config. ' +
@@ -25,12 +14,6 @@ module Treat::Workers::Formatters::Unserializers::Mongo
25
14
  @@database ||= Mongo::Connection.
26
15
  new(Treat.databases.mongo.host).
27
16
  db(Treat.databases.mongo.db || options[:db])
28
-
29
- self.do_unserialize(entity, options)
30
-
31
- end
32
-
33
- def self.do_unserialize(entity, options)
34
17
 
35
18
  supertype = cl(Treat::Entities.const_get(
36
19
  entity.type.to_s.capitalize.intern).superclass).downcase
@@ -39,38 +22,33 @@ module Treat::Workers::Formatters::Unserializers::Mongo
39
22
 
40
23
  coll = @@database.collection(supertypes)
41
24
  record = coll.find_one(:id => entity.id)
42
-
25
+
43
26
  unless record
44
27
  raise Treat::Exception,
45
28
  "Couldn't find record ID #{entity.id}."
46
29
  end
30
+
31
+ self.do_unserialize(record, options)
32
+
33
+ end
34
+
35
+ def self.do_unserialize(record, options)
36
+
37
+ entity = Treat::Entities.
38
+ const_get(record['type'].
39
+ capitalize.intern).new(
40
+ record['value'], record['id'])
47
41
 
48
- # Convert feature keys to symbols.
49
42
  features = record['features']
50
43
  new_feat = {}
51
44
  features.each do |feature, value|
52
45
  new_feat[feature.intern] = value
53
46
  end
54
- entity.features = new_feat
55
47
 
56
- # Set the entity's value.
57
- entity.value = record['value']
58
-
59
- if entity.class.compare_with(
60
- options[:stop_at]) == 0
61
- entity.value = record['string']
62
- end
48
+ entity.features = new_feat
63
49
 
64
- return entity unless options[:recursive]
65
-
66
50
  record['children'].each do |c|
67
- cid, ctype = *c
68
- cklass = Treat::Entities.const_get(
69
- ctype.capitalize.intern)
70
- next if cklass.compare_with(
71
- options[:stop_at]) < 0
72
- entity << self.do_unserialize(
73
- cklass.new('', cid), options)
51
+ entity << self.do_unserialize(c, options)
74
52
  end
75
53
 
76
54
  entity
@@ -7,22 +7,21 @@ class Treat::Workers::Learners::Classifiers::ID3
7
7
  def self.classify(entity, options = {})
8
8
 
9
9
  set = options[:training]
10
- cl = set.classification
10
+ cl = set.problem
11
11
 
12
12
  if !@@classifiers[cl]
13
13
  dec_tree = DecisionTree::ID3Tree.new(
14
- set.labels.map { |l| l.to_s }, set.items,
15
- cl.default, cl.mode)
14
+ cl.labels.map { |l| l.to_s }, set.items, cl.question.default,
15
+ cl.question.type)
16
16
  dec_tree.train
17
17
  @@classifiers[cl] = dec_tree
18
18
  else
19
19
  dec_tree = @@classifiers[cl]
20
+ dec_tree.graph('testingbitch')
20
21
  end
21
-
22
22
  dec_tree.predict(
23
23
  cl.export_item(entity, false)
24
24
  )
25
-
26
25
  end
27
26
 
28
27
  end
@@ -8,7 +8,7 @@ class Treat::Workers::Learners::Classifiers::MLP
8
8
  def self.classify(entity, options = {})
9
9
 
10
10
  set = options[:training]
11
- cl = set.classification
11
+ cl = set.problem
12
12
 
13
13
  if !@@mlps[cl]
14
14
  net = Ai4r::NeuralNetwork::
data/lib/treat/workers.rb CHANGED
@@ -34,7 +34,7 @@ module Treat::Workers
34
34
  def self.create_category(name, conf)
35
35
  category = self.const_set(name, Module.new)
36
36
  conf.each_pair do |group, worker|
37
- name = group.to_s.capitalize.intern
37
+ name = cc(group.to_s).intern
38
38
  category.module_eval do
39
39
  @@methods = []; def methods;
40
40
  @@methods; end; def groups;
data/spec/entity.rb CHANGED
@@ -113,12 +113,14 @@ describe Treat::Entities::Entity do
113
113
  describe "Exportable" do
114
114
 
115
115
  context "when supplied with a classification to export" do
116
- classification = Treat::Core::Classification.new(:word, :tag, :is_keyword)
116
+ feature = Treat::Core::Feature.new(:tag)
117
+ question = Treat::Core::Question.new(:is_keyword, :word, :discrete, false)
118
+ problem = Treat::Core::Problem.new(question, feature)
117
119
  it "returns a data set with the exported features" do
118
- ds = @sentence.export(classification)
119
- ds.classification.should eql classification
120
- ds.labels.should eql [:tag]
121
- ds.ids.should eql @sentence.words.map { |w| w.id }
120
+ ds = @sentence.export(problem)
121
+ ds.problem.should eql problem
122
+ ds.problem.labels.should eql [:tag]
123
+ ds.entities.should eql @sentence.words.map { |w| w.id }
122
124
  ds.items.should eql [
123
125
  ["DT", false], ["JJ", false],
124
126
  ["NN", false], ["VBZ", false],
data/spec/phrase.rb CHANGED
@@ -42,7 +42,7 @@ describe Treat::Entities::Phrase do
42
42
 
43
43
  describe "#time" do
44
44
  it "returns a DateTime object representing the time in the phrase" do
45
- Treat.languages.english[:workers][:extractors][:time].each do |e|
45
+ Treat.languages.english.workers.extractors.time.each do |e|
46
46
  t = 'october 2006'.time(e)
47
47
  t.month.should eql 10
48
48
  end
@@ -55,7 +55,7 @@ describe Treat::Entities::Phrase do
55
55
  describe "#tokenize" do
56
56
 
57
57
  it "splits a phrase/sentence into tokens and adds them as children of the phrase" do
58
- Treat.languages.english[:workers][:processors][:tokenizers].each do |t|
58
+ Treat.languages.english.workers.processors.tokenizers.each do |t|
59
59
  @phrase = Treat::Entities::Phrase.new('a phrase to tokenize')
60
60
  @phrase.tokenize(t)
61
61
  @phrase.children.should eql @phrase.tokens
data/spec/zone.rb CHANGED
@@ -5,13 +5,12 @@ describe Treat::Entities::Zone do
5
5
  describe "Processable" do
6
6
 
7
7
  before do
8
- @processors = Treat.languages.
9
- english[:workers][:processors]
8
+ @processors = Treat.languages.english.workers.processors
10
9
  end
11
10
  describe "#segment" do
12
11
 
13
12
  it "splits a zone into phrases/sentences and adds them as children of the zone" do
14
- @processors[:segmenters].each do |s|
13
+ @processors.segmenters.each do |s|
15
14
  paragraph = Treat::Entities::Paragraph.new(
16
15
  "This is a first sentence inside the first paragraph. " +
17
16
  "This is the second sentence that is inside the paragraph.")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treat
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-11 00:00:00.000000000 Z
12
+ date: 2012-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: schiphol
@@ -27,6 +27,38 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: birch
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: sourcify
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
30
62
  - !ruby/object:Gem::Dependency
31
63
  name: rspec
32
64
  requirement: !ruby/object:Gem::Requirement
@@ -67,11 +99,6 @@ extensions: []
67
99
  extra_rdoc_files: []
68
100
  files:
69
101
  - bin/MANIFEST
70
- - bin/stanford/bridge.jar
71
- - bin/stanford/joda-time.jar
72
- - bin/stanford/stanford-corenlp.jar
73
- - bin/stanford/stanford-parser.jar
74
- - bin/stanford/xom.jar
75
102
  - lib/treat/config/core/acronyms.rb
76
103
  - lib/treat/config/core/encodings.rb
77
104
  - lib/treat/config/core/entities.rb
@@ -110,10 +137,11 @@ files:
110
137
  - lib/treat/config/workers/processors.rb
111
138
  - lib/treat/config/workers/retrievers.rb
112
139
  - lib/treat/config.rb
113
- - lib/treat/core/classification.rb
114
140
  - lib/treat/core/data_set.rb
141
+ - lib/treat/core/feature.rb
115
142
  - lib/treat/core/node.rb
116
- - lib/treat/core/server.rb
143
+ - lib/treat/core/problem.rb
144
+ - lib/treat/core/question.rb
117
145
  - lib/treat/core.rb
118
146
  - lib/treat/entities/abilities/buildable.rb
119
147
  - lib/treat/entities/abilities/checkable.rb
@@ -224,18 +252,12 @@ files:
224
252
  - spec/samples/mathematicians/gauss.pdf
225
253
  - spec/samples/mathematicians/leibniz.txt
226
254
  - spec/samples/mathematicians/newton.doc
227
- - spec/sandbox.rb
228
255
  - spec/token.rb
229
256
  - spec/treat.rb
230
257
  - spec/word.rb
231
258
  - spec/zone.rb
232
- - tmp/english.yaml
233
259
  - tmp/MANIFEST
234
- - files/21552208.html
235
- - files/3_2_release_notes.html
236
260
  - files/MANIFEST
237
- - files/nethttp-cheat-sheet-2940.html
238
- - files/weather-central-canada-heat-wave.html
239
261
  - README.md
240
262
  - LICENSE
241
263
  homepage: https://github.com/louismullie/treat
Binary file
Binary file
Binary file
Binary file
data/bin/stanford/xom.jar DELETED
Binary file