treat 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +28 -0
  3. data/README +0 -0
  4. data/TODO +67 -0
  5. data/bin/INFO +1 -0
  6. data/examples/benchmark.rb +81 -0
  7. data/examples/keywords.rb +60 -0
  8. data/examples/texts/bugged_out.txt +26 -0
  9. data/examples/texts/half_cocked_basel.txt +16 -0
  10. data/examples/texts/hedge_funds.txt +24 -0
  11. data/examples/texts/hose_and_dry.txt +19 -0
  12. data/examples/texts/hungarys_troubles.txt +46 -0
  13. data/examples/texts/indias_slowdown.txt +15 -0
  14. data/examples/texts/merkozy_rides_again.txt +24 -0
  15. data/examples/texts/prada_is_not_walmart.txt +9 -0
  16. data/examples/texts/republican_nomination.txt +26 -0
  17. data/examples/texts/to_infinity_and_beyond.txt +15 -0
  18. data/lib/treat.rb +91 -0
  19. data/lib/treat/buildable.rb +115 -0
  20. data/lib/treat/categories.rb +29 -0
  21. data/lib/treat/category.rb +28 -0
  22. data/lib/treat/delegatable.rb +90 -0
  23. data/lib/treat/detectors.rb +28 -0
  24. data/lib/treat/detectors/encoding/native.rb +12 -0
  25. data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
  26. data/lib/treat/detectors/format/file.rb +36 -0
  27. data/lib/treat/detectors/language/language_detector.rb +19 -0
  28. data/lib/treat/detectors/language/what_language.rb +29 -0
  29. data/lib/treat/entities.rb +52 -0
  30. data/lib/treat/entities/collection.rb +19 -0
  31. data/lib/treat/entities/constituents.rb +15 -0
  32. data/lib/treat/entities/document.rb +11 -0
  33. data/lib/treat/entities/entity.rb +242 -0
  34. data/lib/treat/entities/sentence.rb +8 -0
  35. data/lib/treat/entities/text.rb +7 -0
  36. data/lib/treat/entities/tokens.rb +37 -0
  37. data/lib/treat/entities/zones.rb +17 -0
  38. data/lib/treat/exception.rb +5 -0
  39. data/lib/treat/extractors.rb +41 -0
  40. data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
  41. data/lib/treat/extractors/named_entity/abner.rb +20 -0
  42. data/lib/treat/extractors/named_entity/stanford.rb +174 -0
  43. data/lib/treat/extractors/statistics/frequency.rb +22 -0
  44. data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
  45. data/lib/treat/extractors/statistics/position_in.rb +13 -0
  46. data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
  47. data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
  48. data/lib/treat/extractors/time/chronic.rb +12 -0
  49. data/lib/treat/extractors/time/native.rb +12 -0
  50. data/lib/treat/extractors/time/nickel.rb +45 -0
  51. data/lib/treat/extractors/topic_words/lda.rb +71 -0
  52. data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
  53. data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
  54. data/lib/treat/extractors/topics/reuters.rb +91 -0
  55. data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
  56. data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
  57. data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
  58. data/lib/treat/feature.rb +53 -0
  59. data/lib/treat/formatters.rb +44 -0
  60. data/lib/treat/formatters/cleaners/html.rb +17 -0
  61. data/lib/treat/formatters/readers/autoselect.rb +35 -0
  62. data/lib/treat/formatters/readers/gocr.rb +24 -0
  63. data/lib/treat/formatters/readers/html.rb +13 -0
  64. data/lib/treat/formatters/readers/ocropus.rb +31 -0
  65. data/lib/treat/formatters/readers/pdf.rb +17 -0
  66. data/lib/treat/formatters/readers/txt.rb +15 -0
  67. data/lib/treat/formatters/serializers/xml.rb +48 -0
  68. data/lib/treat/formatters/serializers/yaml.rb +15 -0
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
  70. data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
  71. data/lib/treat/formatters/unserializers/xml.rb +79 -0
  72. data/lib/treat/formatters/unserializers/yaml.rb +15 -0
  73. data/lib/treat/formatters/visualizers/dot.rb +73 -0
  74. data/lib/treat/formatters/visualizers/html.rb +12 -0
  75. data/lib/treat/formatters/visualizers/inspect.rb +16 -0
  76. data/lib/treat/formatters/visualizers/short_value.rb +14 -0
  77. data/lib/treat/formatters/visualizers/standoff.rb +41 -0
  78. data/lib/treat/formatters/visualizers/tree.rb +28 -0
  79. data/lib/treat/formatters/visualizers/txt.rb +31 -0
  80. data/lib/treat/group.rb +96 -0
  81. data/lib/treat/inflectors.rb +50 -0
  82. data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
  83. data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
  84. data/lib/treat/inflectors/declensors/en.rb +18 -0
  85. data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
  86. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
  87. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
  88. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
  89. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
  90. data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
  91. data/lib/treat/inflectors/stemmers/porter.rb +158 -0
  92. data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
  93. data/lib/treat/inflectors/stemmers/uea.rb +30 -0
  94. data/lib/treat/lexicalizers.rb +49 -0
  95. data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
  96. data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
  97. data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
  98. data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
  99. data/lib/treat/lexicalizers/tag/brill.rb +101 -0
  100. data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
  101. data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
  102. data/lib/treat/processors.rb +45 -0
  103. data/lib/treat/processors/chunkers/txt.rb +27 -0
  104. data/lib/treat/processors/parsers/enju.rb +214 -0
  105. data/lib/treat/processors/parsers/stanford.rb +60 -0
  106. data/lib/treat/processors/segmenters/punkt.rb +48 -0
  107. data/lib/treat/processors/segmenters/stanford.rb +45 -0
  108. data/lib/treat/processors/segmenters/tactful.rb +34 -0
  109. data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
  110. data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
  111. data/lib/treat/processors/tokenizers/perl.rb +96 -0
  112. data/lib/treat/processors/tokenizers/punkt.rb +42 -0
  113. data/lib/treat/processors/tokenizers/stanford.rb +33 -0
  114. data/lib/treat/processors/tokenizers/tactful.rb +59 -0
  115. data/lib/treat/proxies.rb +66 -0
  116. data/lib/treat/registrable.rb +26 -0
  117. data/lib/treat/resources.rb +10 -0
  118. data/lib/treat/resources/categories.rb +18 -0
  119. data/lib/treat/resources/delegates.rb +96 -0
  120. data/lib/treat/resources/dependencies.rb +0 -0
  121. data/lib/treat/resources/edges.rb +8 -0
  122. data/lib/treat/resources/formats.rb +23 -0
  123. data/lib/treat/resources/languages.rb +86 -0
  124. data/lib/treat/resources/languages.txt +504 -0
  125. data/lib/treat/resources/tags.rb +393 -0
  126. data/lib/treat/sugar.rb +43 -0
  127. data/lib/treat/tree.rb +174 -0
  128. data/lib/treat/utilities.rb +127 -0
  129. data/lib/treat/visitable.rb +27 -0
  130. data/test/profile.rb +2 -0
  131. data/test/tc_detectors.rb +27 -0
  132. data/test/tc_entity.rb +105 -0
  133. data/test/tc_extractors.rb +48 -0
  134. data/test/tc_formatters.rb +46 -0
  135. data/test/tc_inflectors.rb +39 -0
  136. data/test/tc_lexicalizers.rb +39 -0
  137. data/test/tc_processors.rb +36 -0
  138. data/test/tc_resources.rb +27 -0
  139. data/test/tc_treat.rb +64 -0
  140. data/test/tc_tree.rb +60 -0
  141. data/test/tests.rb +19 -0
  142. data/test/texts.rb +20 -0
  143. data/test/texts/english/long.html +24 -0
  144. data/test/texts/english/long.txt +22 -0
  145. data/test/texts/english/medium.txt +5 -0
  146. data/test/texts/english/short.txt +3 -0
  147. metadata +412 -0
@@ -0,0 +1,53 @@
1
+ module Treat
2
+ class Feature
3
+ # Undefine all methods, except those that
4
+ # create any problems (e.g. with serializing).
5
+ instance_methods.each do |meth|
6
+ undef_method(meth) if meth !~
7
+ /^(__|object_id|class|instance_variables|instance_variable_get)/
8
+ end
9
+ # Allows to read the probability hash,
10
+ # the possible values of the feature,
11
+ # and the best value (with highest P).
12
+ attr_reader :p_hash, :values, :best
13
+ # Initialize the feature with a hash
14
+ # of features => probabilities.
15
+ def initialize(p_hash)
16
+ @p_hash = p_hash
17
+ normalize
18
+ max = @p_hash.values.max
19
+ @best = @p_hash.select { |i,j| j == max }.keys.sample
20
+ @values = @p_hash.keys
21
+ type = @values[0].class
22
+ if type == ::Symbol || type == ::NilClass
23
+ @object = @best
24
+ else
25
+ @object = type.new(@best)
26
+ end
27
+ end
28
+ # Normalize the probabilities, so that
29
+ # the sum of all probabilities is one,
30
+ # except if the sum of all probabilities
31
+ # is already below one (in which case we
32
+ # assume that the feature is intentionally
33
+ # incomplete).
34
+ def normalize
35
+ sum = @p_hash.inject(0.0) { |r, e| r + e[1] }
36
+ return if sum <= 1.0
37
+ p = {}
38
+ @p_hash.each { |k,v| p[k] = v.to_f/sum.to_f }
39
+ @p_hash = p
40
+ end
41
+ # Find the probability of value x.
42
+ def probability(x)
43
+ @p_hash[x] ? @p_hash[x] : 0
44
+ end
45
+ # Alias for probability: p(x).
46
+ alias :p :probability
47
+ # Catch all other methods than the ones
48
+ # explicitly defined.
49
+ def method_missing(sym, *args, &block)
50
+ @object.send(sym, *args, &block)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,44 @@
1
+ module Treat
2
+ # Formatters handle conversion of Entities to and from
3
+ # external file formats.
4
+ module Formatters
5
+ # Readers read a document and create the top-level entity
6
+ # corresponding to the content of the document.
7
+ module Readers
8
+ extend Group
9
+ self.type = :transformer
10
+ self.targets = [:collection, :document]
11
+ self.default = :autoselect
12
+ end
13
+ # Unserializers recreate entities from a serialized format.
14
+ module Unserializers
15
+ extend Group
16
+ self.type = :transformer
17
+ self.targets = [:collection, :document]
18
+ self.default = :autoselect
19
+ end
20
+ # Visualizers transform entities into a visualizable format.
21
+ module Visualizers
22
+ extend Group
23
+ self.type = :computer
24
+ self.targets = [:entity]
25
+ self.default = :tree
26
+ end
27
+ # Serializers transform entities into a storable format.
28
+ module Serializers
29
+ extend Group
30
+ self.type = :computer
31
+ self.targets = [:entity]
32
+ self.default = :yaml
33
+ end
34
+ # Serializers transform entities into a storable format.
35
+ module Cleaners
36
+ extend Group
37
+ self.type = :annotator
38
+ self.targets = [:document]
39
+ self.default = :html
40
+ end
41
+ extend Treat::Category
42
+ end
43
+ end
44
+
@@ -0,0 +1,17 @@
1
+ module Treat
2
+ module Formatters
3
+ module Cleaners
4
+ class HTML
5
+ silently { require 'hpricot' }
6
+ def self.clean(document, options = {})
7
+ document.each_text do |text|
8
+ text.set :html_value, text.value
9
+ v = Hpricot(text.value).inner_text
10
+ text.value = v
11
+ end
12
+ document
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ # This class isn't a wrapper for anything.
5
+ # It simply delegates the reading task to
6
+ # the appropriate reader based on the file
7
+ # extension of the supplied document.
8
+ class Autoselect
9
+ # A list of image extensions that should be routed
10
+ # to the Ocropus OCR engine.
11
+ ImageExtensions = ['gif', 'jpg', 'jpeg', 'png']
12
+ # Select the appropriate reader based on the format
13
+ # of the filename in document.
14
+ #
15
+ # Options:
16
+ # :ocr => :ocropus | :gocr (the OCR engine to use).
17
+ def self.read(document, options = {:ocr => :ocropus})
18
+ ext = document.file.split('.')[-1]
19
+ if ImageExtensions.include?(ext)
20
+ reader = 'ocropus'
21
+ else
22
+ reader = ext
23
+ end
24
+ begin
25
+ r = Treat::Formatters::Readers.const_get(cc(reader))
26
+ rescue NameError
27
+ raise Treat::Exception,
28
+ "Cannot find a default reader for format: '#{ext}'."
29
+ end
30
+ document = r.read(document, options)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ # A wrapper class for the GOCR engine.
5
+ #
6
+ # "GOCR is an OCR (Optical Character Recognition)
7
+ # program, developed under the GNU Public License.
8
+ # It converts scanned images of text back to text files."
9
+ #
10
+ # Project site: http://jocr.sourceforge.net
11
+ class GOCR
12
+ # Read a file using the GOCR reader.
13
+ def self.read(document, options = {})
14
+ create_temp_file(:pgm) do |tmp|
15
+ `convert #{document.file} #{tmp}`
16
+ f = `gocr #{tmp}`.strip
17
+ document << Treat::Entities::Entity.from_string(f)
18
+ end
19
+ document
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,13 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ class HTML
5
+ def self.read(document, options = {})
6
+ f = File.read(document.file)
7
+ document << Treat::Entities::Entity.from_string(f)
8
+ document.clean(:html)
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,31 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ # This class is a wrapper for the Google Ocropus
5
+ # optical character recognition (OCR) engine.
6
+ #
7
+ # "OCRopus(tm) is a state-of-the-art document
8
+ # analysis and OCR system, featuring pluggable
9
+ # layout analysis, pluggable character recognition,
10
+ # statistical natural language modeling, and multi-
11
+ # lingual capabilities."
12
+ #
13
+ # Original paper:
14
+ # Breuel, Thomas M. The Ocropus Open Source OCR System.
15
+ # DFKI and U. Kaiserslautern, Germany.
16
+ class Ocropus
17
+ # Read a file using the Google Ocropus reader.
18
+ def self.read(document, options = {})
19
+ create_temp_file(:txt) do |tmp|
20
+ capture(:stderr) do
21
+ `ocropus page #{document.file} > #{tmp} -STDIO 2>/dev/null`
22
+ end
23
+ f = File.read(tmp)
24
+ document << Treat::Entities::Entity.from_string(f)
25
+ end
26
+ document
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,17 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ class PDF
5
+ require 'fileutils'
6
+ # Read a file using the Poppler pdf2text utility.
7
+ def self.read(document, options = {})
8
+ create_temp_file(:txt) do |tmp|
9
+ `pdftotext #{document.file} #{tmp} `.strip
10
+ document << Treat::Entities::Entity.from_string(File.read(tmp))
11
+ end
12
+ document
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ # This class simply reads a plain text file.
5
+ class Txt
6
+ # Build an entity from a string in plain text format.
7
+ def self.read(document, options = {})
8
+ f = File.read(document.file)
9
+ document << Treat::Entities::Entity.from_string(f)
10
+ document
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,48 @@
1
+ module Treat
2
+ module Formatters
3
+ module Serializers
4
+ # This class converts an entity to XML format.
5
+ class XML
6
+ # Reauire the Nokogiri XML parser.
7
+ require 'nokogiri'
8
+ # Serialize an entity tree in XML format.
9
+ def self.serialize(entity, options = {})
10
+ options = {:indent => 0} if options[:indent].nil?
11
+ if options[:indent] == 0
12
+ string = '<?xml version="1.0" encoding="UTF-8" standalone="no" ?>'
13
+ else
14
+ string = ''
15
+ end
16
+ spaces = ''
17
+ options[:indent].times { spaces << ' ' }
18
+ attributes = ''
19
+ if !entity.features.nil? && entity.features.size != 0
20
+ attributes = ' '
21
+ entity.features.each_pair do |feature, value|
22
+ if value.is_a? Entities::Entity
23
+ attributes << "#{feature}='#{value.id}' "
24
+ else
25
+ attributes << "#{feature}='#{value}' "
26
+ end
27
+ end
28
+ entity.edges.each_pair do |id,edge|
29
+ attributes << "#{edge}='#{id}' "
30
+ end
31
+ end
32
+ tag = entity.class.to_s.split('::')[-1].downcase
33
+ string += "\n#{spaces}<#{tag}#{attributes[0..-2]}>"
34
+ if entity.has_children?
35
+ options[:indent] += 1
36
+ entity.children.each do |child|
37
+ string = string + serialize(child, options)
38
+ end
39
+ options[:indent] -= 1
40
+ else
41
+ string = string + "\n#{spaces}#{entity.value}"
42
+ end
43
+ string + "\n#{spaces}</#{tag}>"
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,15 @@
1
+ module Treat
2
+ module Formatters
3
+ module Serializers
4
+ # Require the Psych YAML serializer.
5
+ require 'psych'
6
+ # This class serializes entities in YAML format.
7
+ class YAML
8
+ # Serialize an entity in YAML format.
9
+ def self.serialize(entity, options = {})
10
+ ::Psych.dump(entity)
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,96 @@
1
+ require 'yaml'
2
+ require 'set'
3
+
4
+ class Class
5
+ def persist
6
+ @persist = [] if !@persist
7
+ @persist
8
+ end
9
+
10
+ def persist= p
11
+ @persist = p if p.kind_of?(Array)
12
+ end
13
+
14
+ def persist_with_parent
15
+ p = []
16
+ klass = self;
17
+ while klass
18
+ p.concat(klass.persist)
19
+ klass = klass.superclass
20
+ end
21
+ p.uniq
22
+ end
23
+ end
24
+
25
+ class Object
26
+ def self.persistent *var
27
+ for i in (0..var.length-1)
28
+ var[i] = var[i].to_s
29
+ end
30
+ self.persist.concat(var)
31
+ self.persist.uniq!
32
+ end
33
+
34
+ alias_method :old_to_yaml, :to_yaml
35
+
36
+ def to_yaml ( opts = {} )
37
+ p = self.class.persist_with_parent
38
+
39
+ if p && p.size > 0
40
+ yaml_emit opts do |map|
41
+ p.each do |m|
42
+ map.add( m, instance_variable_get( '@' + m ) )
43
+ end
44
+ end
45
+ else
46
+ old_to_yaml opts
47
+ end
48
+ end
49
+ private
50
+ def yaml_emit opts
51
+ YAML::quick_emit( object_id, opts ) do |out|
52
+ out.map( taguri, to_yaml_style ) do |map|
53
+ yield map
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ module RHNH
60
+ module EnumerablePostDeserializeHelper
61
+ def post_deserialize
62
+ self.each do |e|
63
+ YAML.call_post_deserialize(e) if e
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ class Array
70
+ include RHNH::EnumerablePostDeserializeHelper
71
+ end
72
+
73
+ class Hash
74
+ include RHNH::EnumerablePostDeserializeHelper
75
+ end
76
+
77
+
78
+ module YAML
79
+ def YAML.call_post_deserialize obj, object_map = ::Set.new
80
+ if !object_map.include?(obj.object_id)
81
+ object_map.add(obj.object_id)
82
+
83
+ obj.instance_variables.each do |v|
84
+ call_post_deserialize obj.instance_variable_get(v), object_map
85
+ end
86
+
87
+ obj.post_deserialize if obj.respond_to?('post_deserialize')
88
+ end
89
+ end
90
+
91
+ def YAML.load( io )
92
+ yp = parser.load( io )
93
+ call_post_deserialize yp
94
+ yp
95
+ end
96
+ end
@@ -0,0 +1,19 @@
1
+ module Treat
2
+ module Formatters
3
+ module Unserializers
4
+ class Autoselect
5
+ def self.unserialize(document, options = {})
6
+ ext = document.file.split('.')[-1]
7
+ if ext == 'yaml' || ext == 'yml'
8
+ document.unserialize(:yaml)
9
+ elsif ext == 'xml'
10
+ document.unserialize(:xml)
11
+ else
12
+ raise "File #{document.file} was not recognized"+
13
+ "as a supported serialized format."
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,79 @@
1
+ module Treat
2
+ module Formatters
3
+ module Unserializers
4
+ class XML
5
+ require 'nokogiri'
6
+
7
+ def self.unserialize(document, options = {})
8
+ # Read in the XML file.
9
+ xml = File.read(document.file)
10
+ xml_reader = Nokogiri::XML::Reader.from_memory(xml)
11
+ current_element = nil
12
+ previous_depth = 0
13
+
14
+ # Read the XML file entity by entity.
15
+ while xml_reader.read
16
+ # The depth in the XML tree.
17
+ current_depth = xml_reader.depth
18
+ # If we are at the end of the children stack, pop up.
19
+ if previous_depth > current_depth && current_depth != 0
20
+ current_element = current_element.parent
21
+ end
22
+ # If an end element has been reached,
23
+ # change the depth and pop up on next
24
+ # iteration.
25
+ if xml_reader.node_type ==
26
+ Nokogiri::XML::Reader::TYPE_END_ELEMENT
27
+ previous_depth = current_depth
28
+ next
29
+ end
30
+
31
+ id = nil; value = ''
32
+ attributes = {}; edges = {}
33
+ xml_reader.attributes.each_pair do |k,v|
34
+ if k == 'id'
35
+ id = v
36
+ elsif k == 'edges'
37
+ edges = v
38
+ elsif k == 'value'
39
+ value = v
40
+ else
41
+ attributes[k.intern] = v
42
+ end
43
+ end
44
+
45
+ current_value = ''
46
+ type = xml_reader.name.intern
47
+
48
+ if Treat::Entities.list.include?(type)
49
+ if !current_element
50
+ current_element = self.revive(type, current_value, id)
51
+ else
52
+ current_element = current_element <<
53
+ self.revive(type, current_value, id)
54
+ end
55
+ current_element.features = attributes
56
+ current_element.features = attributes
57
+ current_element.edges = edges
58
+ else
59
+ current_value = xml_reader.value.strip
60
+ if current_value && current_value != ''
61
+ current_element.value = current_value
62
+ end
63
+ end
64
+
65
+ previous_depth = current_depth
66
+ end
67
+ document << current_element
68
+ document
69
+ end
70
+
71
+ def self.revive(type, value, id)
72
+ klass = Treat::Entities.const_get(cc(type))
73
+ klass.new(value, id)
74
+ end
75
+
76
+ end
77
+ end
78
+ end
79
+ end