RubyGems - treat - Versions diffs - 0.1.2 → 0.1.3 - Mend

treat 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

data/LICENSE +7 -8
data/TODO +16 -13
data/examples/keywords.rb +89 -1
data/lib/treat/buildable.rb +1 -8
data/lib/treat/categories.rb +3 -4
data/lib/treat/category.rb +1 -1
data/lib/treat/delegatable.rb +1 -1
data/lib/treat/detectors/encoding/native.rb +5 -0
data/lib/treat/detectors/encoding/r_chardet19.rb +2 -3
data/lib/treat/detectors/language/language_detector.rb +4 -0
data/lib/treat/detectors/language/what_language.rb +4 -4
data/lib/treat/detectors.rb +1 -1
data/lib/treat/entities/entity.rb +5 -3
data/lib/treat/entities/tokens.rb +14 -5
data/lib/treat/entities/zones.rb +4 -0
data/lib/treat/entities.rb +7 -5
data/lib/treat/extractors/keywords/topics_frequency.rb +40 -0
data/lib/treat/extractors/statistics/{frequency.rb → frequency_in.rb} +5 -4
data/lib/treat/extractors/statistics/frequency_of.rb +3 -5
data/lib/treat/extractors/statistics/{position_in.rb → position_in_parent.rb} +4 -3
data/lib/treat/extractors/statistics/tf_idf.rb +36 -0
data/lib/treat/extractors/statistics/transition_matrix.rb +25 -25
data/lib/treat/extractors/statistics/transition_probability.rb +8 -4
data/lib/treat/extractors/time/chronic.rb +8 -0
data/lib/treat/extractors/time/native.rb +6 -0
data/lib/treat/extractors/time/nickel.rb +31 -23
data/lib/treat/extractors/topic_words/lda.rb +21 -16
data/lib/treat/extractors/topics/reuters.rb +6 -4
data/lib/treat/extractors.rb +7 -7
data/lib/treat/formatters/readers/abw.rb +32 -0
data/lib/treat/formatters/readers/autoselect.rb +13 -11
data/lib/treat/formatters/readers/doc.rb +13 -0
data/lib/treat/formatters/readers/gocr.rb +2 -0
data/lib/treat/formatters/readers/html.rb +21 -1
data/lib/treat/formatters/readers/ocropus.rb +3 -3
data/lib/treat/formatters/readers/odt.rb +41 -0
data/lib/treat/formatters/readers/pdf.rb +5 -2
data/lib/treat/formatters/readers/txt.rb +2 -0
data/lib/treat/formatters/serializers/xml.rb +3 -2
data/lib/treat/formatters/serializers/yaml.rb +2 -0
data/lib/treat/formatters/unserializers/autoselect.rb +7 -1
data/lib/treat/formatters/unserializers/xml.rb +6 -1
data/lib/treat/formatters/unserializers/yaml.rb +5 -1
data/lib/treat/formatters/visualizers/dot.rb +35 -37
data/lib/treat/formatters/visualizers/html.rb +1 -0
data/lib/treat/formatters/visualizers/inspect.rb +4 -0
data/lib/treat/formatters/visualizers/short_value.rb +18 -3
data/lib/treat/formatters/visualizers/standoff.rb +11 -6
data/lib/treat/formatters/visualizers/tree.rb +5 -1
data/lib/treat/formatters/visualizers/txt.rb +6 -1
data/lib/treat/formatters.rb +1 -1
data/lib/treat/group.rb +4 -3
data/lib/treat/inflectors/cardinal_words/linguistics.rb +14 -17
data/lib/treat/inflectors/conjugations/linguistics.rb +16 -3
data/lib/treat/inflectors/declensions/linguistics.rb +17 -6
data/lib/treat/inflectors/ordinal_words/linguistics.rb +9 -10
data/lib/treat/inflectors/stem/porter.rb +6 -2
data/lib/treat/inflectors/stem/porter_c.rb +4 -1
data/lib/treat/inflectors/stem/uea.rb +4 -4
data/lib/treat/languages/english/tags.rb +16 -0
data/lib/treat/languages/english.rb +4 -1
data/lib/treat/lexicalizers/category/from_tag.rb +4 -4
data/lib/treat/lexicalizers/linkages/naive.rb +3 -3
data/lib/treat/lexicalizers/tag/brill.rb +3 -11
data/lib/treat/lexicalizers/tag/lingua.rb +4 -6
data/lib/treat/lexicalizers.rb +0 -2
data/lib/treat/processors/chunkers/txt.rb +4 -4
data/lib/treat/processors/parsers/enju.rb +3 -17
data/lib/treat/processors/parsers/stanford.rb +4 -0
data/lib/treat/processors/segmenters/punkt.rb +1 -0
data/lib/treat/processors/segmenters/stanford.rb +4 -0
data/lib/treat/processors/segmenters/tactful.rb +4 -1
data/lib/treat/processors/tokenizers/punkt.rb +1 -2
data/lib/treat/processors/tokenizers/stanford.rb +4 -0
data/lib/treat/processors/tokenizers/tactful.rb +1 -1
data/lib/treat/processors.rb +4 -4
data/lib/treat/proxies.rb +18 -11
data/lib/treat/registrable.rb +12 -5
data/lib/treat/sugar.rb +8 -3
data/lib/treat/tree.rb +10 -3
data/lib/treat.rb +55 -55
data/test/tc_entity.rb +7 -7
data/test/tc_extractors.rb +6 -4
data/test/tc_formatters.rb +0 -4
data/test/tests.rb +2 -0
data/test/texts.rb +4 -4
metadata +48 -56
data/examples/texts/bugged_out.txt +0 -26
data/examples/texts/half_cocked_basel.txt +0 -16
data/examples/texts/hedge_funds.txt +0 -24
data/examples/texts/hose_and_dry.txt +0 -19
data/examples/texts/hungarys_troubles.txt +0 -46
data/examples/texts/indias_slowdown.txt +0 -15
data/examples/texts/merkozy_rides_again.txt +0 -24
data/examples/texts/prada_is_not_walmart.txt +0 -9
data/examples/texts/republican_nomination.txt +0 -26
data/examples/texts/to_infinity_and_beyond.txt +0 -15
data/lib/treat/entities/text.rb +0 -7
data/lib/treat/extractors/key_sentences/topics_frequency.rb +0 -49
data/lib/treat/formatters/cleaners/html.rb +0 -17

data/lib/treat/extractors/statistics/transition_probability.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 module Treat
   module Extractors
     module Statistics
+      # Experimental algorithm to calculate the transition
+      # probability of an observed word.
       class TransitionProbability
         # Find the transition probability.
@@ -18,14 +20,16 @@ module Treat
               next unless tm[f1][v1]
               relationships.each do |relationship|
-                relatives = target.send(relationship)
+                relatives = entity.send(relationship)
                 relatives = [relatives] unless relatives.is_a? Array
                 relatives.each do |relative|
                   next if relative.nil? || !relative.has?(f2)
                   v2 = relative.send(f2)
-                  if tm[f1][v1][relationship][f2][v2]
-                    score += tm[f1][v1][relationship][f2][v2]
-                    count += 1
+                  if tm[f1][v1][relationship] &&
+                    tm[f1][v1][relationship][f2] &&
+                    tm[f1][v1][relationship][f2][v2]
+                      score += tm[f1][v1][relationship][f2][v2]
+                      count += 1
                   end
                 end
               end

data/lib/treat/extractors/time/chronic.rb CHANGED Viewed

@@ -1,8 +1,16 @@
 module Treat
   module Extractors
     module Time
+      # A wrapper for the 'chronic' gem, which parses
+      # time and date information.
+      #
+      # Project website: http://chronic.rubyforge.org/
       class Chronic
         silence_warnings { require 'chronic' }
+        # Return the time information contained within the entity
+        # by parsing it with the 'chronic' gem.
+        #
+        # Options: none.
         def self.time(entity, options = {})
           silence_warnings { ::Chronic.parse(entity.to_s, {:guess => true}) }
         end

data/lib/treat/extractors/time/native.rb CHANGED Viewed

@@ -1,8 +1,14 @@
 module Treat
   module Extractors
     module Time
+      # A wrapper for Ruby's native date/time parsing.
       module Native
         require 'date'
+        # Return a DateTime object representing the date/time
+        # contained within the entity, using Ruby's native
+        # date/time parser.
+        #
+        # Options: none.
         def self.time(entity, options = {})
           ::DateTime.parse(entity.to_s)
         end

data/lib/treat/extractors/time/nickel.rb CHANGED Viewed

@@ -1,45 +1,53 @@
 module Treat
   module Extractors
     module Time
-=begin
-      Annotations
-      Type examples
-      single "lunch with megan tomorrow at noon"
-      daily "Art exhibit until March 1st"
-      weekly "math class every wed from 8-11am"
-      daymonthly "open bar at joes the first friday of every month"
-      datemonthly "pay credit card bill on the 22nd of each month"
-=end
+      # A wrapper for the 'nickel' gem, which parses
+      # times and dates and supplies additional information
+      # concerning these. The additional information supplied
+      # that this class annotates entities with is:
+      #
+      # - time_recurrence: frequency of recurrence in words*.
+      # - time_recurrence_interval: frequency of recurrence in days.
+      # - start_time: a DateTime object representing the beginning of
+      #   an event.
+      # - end_time: a DateTime object representing the end of an event.
+      #
+      # Examples of values for time_recurrence are:
+      #
+      # - single: "lunch with megan tomorrow at noon"
+      # - daily: "Art exhibit until March 1st"
+      # - weekly: "math class every wed from 8-11am"
+      # - daymonthly: "open bar at joes the first friday of every month"
+      # - datemonthly: "pay credit card bill on the 22nd of each month"
+      #
+      # Project website: http://naturalinputs.com/
       module Nickel
         require 'date'
         silence_warnings { require 'nickel' }
+        # Extract time information from a bit of text.
         def self.time(entity, options = {})
           n = silence_warnings { ::Nickel.parse(entity.to_s) }
           occ = n.occurrences[0]
-          # Find the words..
           rec = occ.type.to_s.gsub('single', 'once').intern
           entity.set :time_recurrence, rec
-          interval = occ.interval ? occ.interval.intern : :none
+          interval = occ.interval ? occ.interval : :none
           entity.set :time_recurrence_interval, interval
           s = [occ.start_date, occ.start_time]
           ds = [s[0].year, s[0].month, s[0].day] if s[0]
-          ts = [s[1].hour, s[1].min, s[1].sec] if s[1]
+          #ts = [s[1].hour, s[1].min, s[1].sec] if s[1]
           e = [occ.end_date, occ.end_time]
           de = [e[0].year, e[0].month, e[0].day] if e[0]
-          te = [e[1].hour, e[1].min, e[1].sec] if e[1]
-          entity.set :start_time, ::DateTime.civil(*ds, *ts) if ds
-          entity.set :end_time, ::DateTime.civil(*de, *te) if de
+          #te = [e[1].hour, e[1].min, e[1].sec] if e[1]
+          entity.set :start_time, ::DateTime.civil(*ds) if ds
+          entity.set :end_time, ::DateTime.civil(*de) if de
           entity.start_time
         end
       end
     end
   end
 end

data/lib/treat/extractors/topic_words/lda.rb CHANGED Viewed

@@ -9,6 +9,8 @@ module Treat
       # Blei, David M., Ng, Andrew Y., and Jordan, Michael
       # I. 2003. Latent dirichlet allocation. Journal of
       # Machine Learning Research. 3 (Mar. 2003), 993-1022.
+      #
+      # Project website: https://github.com/ealdent/lda-ruby
       class LDA
         # Require the lda-ruby gem.
         silence_warnings { require 'lda-ruby' }
@@ -17,25 +19,28 @@ module Treat
         Lda::TextCorpus.class_eval do
           # Ruby, Y U NO SHUT UP!
           silence_warnings { undef :initialize }
-          # Redefine initialize to take in an array of texts.
-          def initialize(texts)
+          # Redefine initialize to take in an array of sections
+          def initialize(sections)
             super(nil)
-            texts.each do |text|
-              add_document(Lda::TextDocument.new(self, text))
+            sections.each do |section|
+              add_document(Lda::TextDocument.new(self, section))
             end
           end
         end
+        # Default options for the LDA algorithm.
+        DefaultOptions = {
+          topics: 20,
+          words_per_topic: 10,
+          iterations: 20
+        }
+        # Retrieve the topic words of a collection.
         def self.topic_words(collection, options = {})
-          # Set the options
-          options[:words_per_topic] ||= 10
-          options[:topics] ||= 20
-          options[:iterations] ||= 20
+          options = DefaultOptions.merge(options)
           # Create a corpus with the collection
-          texts = collection.texts.collect do |t|
+          sections = collection.sections.collect do |t|
             t.to_s.encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => "?")
           end
-          corpus = Lda::TextCorpus.new(texts)
+          corpus = Lda::TextCorpus.new(sections)
           # Create an Lda object for training
           lda = Lda::Lda.new(corpus)
@@ -43,15 +48,15 @@ module Treat
           lda.max_iter = options[:iterations]
           # Run the EM algorithm using random starting points
           silence_streams(STDOUT, STDERR) { lda.em('random') }
           # Load the vocabulary.
           if options[:vocabulary]
             lda.load_vocabulary(options[:vocabulary])
           end
-          # Get the topic words and annotate the text.
+          # Get the topic words and annotate the section.
           topic_words = lda.top_words(options[:words_per_topic])
           topic_words.each do |i, words|
             collection.each_word do |word|
               if words.include?(word)
@@ -62,7 +67,7 @@ module Treat
               end
             end
           end
           topic_words
         end
       end

data/lib/treat/extractors/topics/reuters.rb CHANGED Viewed

@@ -6,9 +6,9 @@ module Treat
       #
       # Copyright 2005 Mark Watson.  All rights reserved.
       # This software is released under the GPL.
-      #
-      # Modifications for inclusion in Treat by
-      # Louis Mullie (2011).
+      # Rewrite for inclusion in Treat by Louis Mullie (2011).
+      #
+      # Original project website: http://www.markwatson.com/opensource/
       class Reuters
         # Require the Nokogiri XML parser.
         require 'nokogiri'
@@ -17,6 +17,8 @@ module Treat
         @@region = {}
         @@topics = {}
         # Get the topic of the text.
+        #
+        # Options: none.
         def self.topics(text, options = {})
           stems = []
           @@reduce = 0
@@ -33,7 +35,7 @@ module Treat
           topics = score_words(@@industry, stems)
           topics = topics.merge(score_words(@@region, stems))
           topics = topics.merge(score_words(@@topics, stems))
-          Treat::Feature.new(topics)
+          #Treat::Feature.new(topics)
         end
         # Read the topics from the XML files.
         def self.get_topics

data/lib/treat/extractors.rb CHANGED Viewed

@@ -6,19 +6,19 @@ module Treat
     module Time
       extend Group
       self.type = :annotator
-      self.targets = [:word, :constituent, :symbol]
+      self.targets = [:sentence, :word, :constituent, :symbol]
     end
     # Extract the topic from a text.
     module Topics
       extend Group
       self.type = :annotator
-      self.targets = [:collection, :document, :text, :zone, :sentence]
+      self.targets = [:collection, :document, :zone, :sentence]
     end
     # Extract the topic from a text.
     module TopicWords
       extend Group
       self.type = :annotator
-      self.targets = [:collection, :document, :text, :zone, :sentence]
+      self.targets = [:collection, :document, :zone, :sentence]
     end
     # Extract named entities from texts.
     module NamedEntity
@@ -27,15 +27,15 @@ module Treat
       self.targets = [:entity]
     end
     # Extract the key sentences from a text.
-    module KeySentences
+    module Keywords
       extend Group
-      self.type = :computer
-      self.targets = [:collection, :document, :text, :zone, :sentence]
+      self.type = :annotator
+      self.targets = [:collection, :document, :zone, :sentence]
     end
     # This module should be moved out of here ASAP.
     module Statistics
       extend Group
-      self.type = :computer
+      self.type = :annotator
       self.targets = [:entity]
       self.default = :none
     end

data/lib/treat/formatters/readers/abw.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Treat
+  module Formatters
+    module Readers
+      class Abw
+        require 'rexml/document'
+        require 'rexml/streamlistener'
+        def self.read(document, options = {})
+          xml_h = AbiWordXmlHandler.new(
+          REXML::Document.parse_stream((IO.read(document.file)), xml_h))
+          document << xml_h.plain_text
+          document
+        end
+        class AbiWordXmlHandler
+          include REXML::StreamListener
+          attr_reader :plain_text
+          def initialize
+            @plain_text = ""
+          end
+          def text s
+            begin
+              s = s.strip
+              if s.length > 0
+                @plain_text << s
+                @plain_text << "\n"
+              end
+            end if s != 'AbiWord' && s != 'application/x-abiword'
+          end
+        end
+      end
+    end
+  end
+end

data/lib/treat/formatters/readers/autoselect.rb CHANGED Viewed

@@ -6,26 +6,28 @@ module Treat
       # the appropriate reader based on the file
       # extension of the supplied document.
       class Autoselect
-        # A list of image extensions that should be routed
-        # to the Ocropus OCR engine.
+        # A list of image extensions that should be routed to OCR.
         ImageExtensions = ['gif', 'jpg', 'jpeg', 'png']
+        # Default options.
+        DefaultOptions = {:ocr => :ocropus}
         # Select the appropriate reader based on the format
         # of the filename in document.
         #
         # Options:
-        # :ocr => :ocropus | :gocr (the OCR engine to use).
-        def self.read(document, options = {:ocr => :ocropus})
+        #
+        # - :ocr_engine => :ocropus or :gocr (the OCR engine to use).
+        def self.read(document, options)
+          options = DefaultOptions.merge(options)
           ext = document.file.split('.')[-1]
-          if ImageExtensions.include?(ext)
-            reader = 'ocropus'
-          else
-            reader = ext
-          end
+          reader = ImageExtensions.include?(ext) ? 'ocropus' : ext
+          reader = 'html' if reader == 'htm'
+          reader = 'yaml' if reader == 'yml'
           begin
             r = Treat::Formatters::Readers.const_get(cc(reader))
-          rescue NameError
+          rescue NameError => e
+            puts e.message
             raise Treat::Exception,
-            "Cannot find a default reader for format: '#{ext}'."
+            "Cannot find a reader for format: '#{ext}'."
           end
           document = r.read(document, options)
         end

data/lib/treat/formatters/readers/doc.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module Treat
+  module Formatters
+    module Readers
+      class Doc
+        def self.read(document, options = {})
+          f = `antiword #{document.file}`
+          document << Treat::Entities::Entity.from_string(f)
+          document
+        end
+      end
+    end
+  end
+end

data/lib/treat/formatters/readers/gocr.rb CHANGED Viewed

@@ -10,6 +10,8 @@ module Treat
       # Project site: http://jocr.sourceforge.net
       class GOCR
         # Read a file using the GOCR reader.
+        #
+        # Options: none.
         def self.read(document, options = {})
           create_temp_file(:pgm) do |tmp|
             `convert #{document.file} #{tmp}`

data/lib/treat/formatters/readers/html.rb CHANGED Viewed

@@ -1,11 +1,31 @@
 module Treat
   module Formatters
     module Readers
+      # A temporary HTML reader; simply strips the
+      # document of all of its markup.
       class HTML
+        # Require Hpricot.
+        silence_warnings { require 'hpricot' }
+        # By default, backup the HTML text while cleaning.
+        DefaultOptions = { clean: true, backup: false }
+        # Read the HTML document and strip it of its markup.
+        #
+        # Options:
+        #
+        # - (Boolean) :clean => whether to strip HTML markup.
+        # - (Boolean) :backup => whether to backup the HTML
+        #   markup while cleaning.
         def self.read(document, options = {})
+          options = DefaultOptions.merge(options)
           f = File.read(document.file)
           document << Treat::Entities::Entity.from_string(f)
-          document.clean(:html)
+          if options[:clean]
+            document.each do |section|
+              section.set :html_value, section.value if options[:backup]
+              section.value = Hpricot(section.value).inner_text
+            end
+          end
+          document
         end
       end
     end

data/lib/treat/formatters/readers/ocropus.rb CHANGED Viewed

@@ -15,11 +15,11 @@ module Treat
       # DFKI and U. Kaiserslautern, Germany.
       class Ocropus
         #  Read a file using the Google Ocropus reader.
+        #
+        # Options: none.
         def self.read(document, options = {})
           create_temp_file(:txt) do |tmp|
-            capture(:stderr) do
-              `ocropus page #{document.file} > #{tmp} -STDIO 2>/dev/null`
-            end
+            `ocropus page #{document.file} > #{tmp} -STDIO 2>/dev/null`
             f = File.read(tmp)
             document << Treat::Entities::Entity.from_string(f)
           end

data/lib/treat/formatters/readers/odt.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module Treat
+  module Formatters
+    module Readers
+      class Odt
+        # Build an entity from a string in plain text format.
+        def self.read(document, options = {})
+          f = File.read(document.file)
+          f = f.force_encoding("UTF-8")
+          xml_h = OOXmlHandler.new(
+            REXML::Document.parse_stream(f, xml_h)
+          )
+          document << xml_h.plain_text
+          document
+        end
+        class OOXmlHandler
+          require 'rexml/document'
+          require 'rexml/streamlistener'
+          include REXML::StreamListener
+          attr_reader :plain_text
+          def initialize
+            @plain_text = ""
+          end
+          def tag_start(name, attrs)
+            @last_name = name
+          end
+          def text(s)
+            if @last_name.index('text')
+              s = s.strip
+              if s.length > 0
+                @plain_text << s
+                @plain_text << "\n"
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/treat/formatters/readers/pdf.rb CHANGED Viewed

@@ -1,9 +1,12 @@
 module Treat
   module Formatters
     module Readers
+      # A wrapper for the Poppler pdf2text utility, which
+      # extracts the text from a PDF file.
       class PDF
-        require 'fileutils'
-        # Read a file using the Poppler pdf2text utility.
+        # Read a PDF file using the Poppler pdf2text utility.
+        #
+        # Options: none.
         def self.read(document, options = {})
           create_temp_file(:txt) do |tmp|
             `pdftotext #{document.file} #{tmp} `.strip

data/lib/treat/formatters/readers/txt.rb CHANGED Viewed

@@ -4,6 +4,8 @@ module Treat
       # This class simply reads a plain text file.
       class Txt
         # Build an entity from a string in plain text format.
+        #
+        # Options: none.
         def self.read(document, options = {})
           f = File.read(document.file)
           document << Treat::Entities::Entity.from_string(f)

data/lib/treat/formatters/serializers/xml.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Treat
   module Formatters
     module Serializers
-      # This class converts an entity to XML format.
+      # This class converts an entity to a storable XML format.
       class XML
         # Reauire the Nokogiri XML parser.
         require 'nokogiri'
@@ -9,7 +9,8 @@ module Treat
         def self.serialize(entity, options = {})
           options = {:indent => 0} if options[:indent].nil?
           if options[:indent] == 0
-            string = '<?xml version="1.0" encoding="UTF-8" standalone="no" ?>'
+            enc = entity.encoding(:r_chardet19).to_s.gsub('_', '-').upcase
+            string = "<?xml version=\"1.0\" encoding=\"#{enc}\" standalone=\"no\" ?>"
           else
             string = ''
           end

data/lib/treat/formatters/serializers/yaml.rb CHANGED Viewed

@@ -6,6 +6,8 @@ module Treat
       # This class serializes entities in YAML format.
       class YAML
         # Serialize an entity in YAML format.
+        #
+        # Options: none.
         def self.serialize(entity, options = {})
           ::Psych.dump(entity)
         end

data/lib/treat/formatters/unserializers/autoselect.rb CHANGED Viewed

@@ -1,7 +1,13 @@
 module Treat
   module Formatters
     module Unserializers
+      # This class doesn't perform any unserializing;
+      # it simply routes the document to an unserializer
+      # based on the file extension of the document.
       class Autoselect
+        # Unserialize any supported file format.
+        #
+        # Options: none.
         def self.unserialize(document, options = {})
           ext = document.file.split('.')[-1]
           if ext == 'yaml' || ext == 'yml'
@@ -9,7 +15,7 @@ module Treat
           elsif ext == 'xml'
             document.unserialize(:xml)
           else
-            raise "File #{document.file} was not recognized"+
+            raise "File #{document.file} was not recognized "+
             "as a supported serialized format."
           end
         end

data/lib/treat/formatters/unserializers/xml.rb CHANGED Viewed

@@ -1,9 +1,13 @@
 module Treat
   module Formatters
     module Unserializers
+      # Recreates the entity tree corresponding to
+      # a serialized XML file.
       class XML
         require 'nokogiri'
+        # Unserialize an entity stored in XML format.
+        #
+        # Options: none.
         def self.unserialize(document, options = {})
           # Read in the XML file.
           xml = File.read(document.file)
@@ -59,6 +63,7 @@ module Treat
               current_value = xml_reader.value.strip
               if current_value && current_value != ''
                 current_element.value = current_value
+                current_element.register_token(current_element)
               end
             end

data/lib/treat/formatters/unserializers/yaml.rb CHANGED Viewed

@@ -1,10 +1,14 @@
 module Treat
   module Formatters
     module Unserializers
+      # This class is a wrapper for the Psych YAML
+      # parser; it unserializes YAML files.
       class YAML
         # Require the Psych YAML parser.
         require 'psych'
-        # Unserialize a YAML file representing an entity.
+        # Unserialize a YAML file.
+        #
+        # Options: none.
         def self.unserialize(document, options = {})
           document << ::Psych.load(File.read(document.file))
           document