treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
@@ -1,28 +1,31 @@
1
1
  # encoding: utf-8
2
- module Treat
3
- module Formatters
4
- module Readers
5
- # A wrapper for the Poppler pdf2text utility, which
6
- # extracts the text from a PDF file.
7
- class PDF
8
- # Read a PDF file using the Poppler pdf2text utility.
9
- #
10
- # Options: none.
11
- def self.read(document, options = {})
12
- create_temp_file(:txt) do |tmp|
13
- `pdftotext #{document.file} #{tmp} `.strip
14
- f = File.read(tmp)
15
- f.gsub!("\t\r ", '')
16
- f.gsub!('-­‐', '-')
17
- f.gsub!("\n\n", '#keep#')
18
- f.gsub!("\n", ' ')
19
- f.gsub!(" ", ' ')
20
- f.gsub!('#keep#', "\n\n")
21
- document << Treat::Entities::Entity.from_string(f)
22
- end
23
- document
24
- end
25
- end
2
+ # A wrapper for the Poppler pdf2text utility, which
3
+ # extracts the text from a PDF file.
4
+ module Treat::Formatters::Readers::PDF
5
+
6
+ # Read a PDF file using the Poppler pdf2text utility.
7
+ #
8
+ # Options: none.
9
+ def self.read(document, options = {})
10
+
11
+ create_temp_file(:txt) do |tmp|
12
+
13
+ `pdftotext #{document.file} #{tmp} `.strip
14
+ f = File.read(tmp)
15
+ f.gsub!("\t\r ", '')
16
+ f.gsub!('-­‐', '-')
17
+ f.gsub!("\n\n", '#keep#')
18
+ f.gsub!("\n", ' ')
19
+ # Fix for an incompatible space character.
20
+ f.gsub!(" ", ' ')
21
+ f.gsub!('#keep#', "\n\n")
22
+
23
+ document.value = f
24
+ document.set :format, :pdf
25
+ document
26
+
26
27
  end
28
+
27
29
  end
28
- end
30
+
31
+ end
@@ -1,17 +1,14 @@
1
- module Treat
2
- module Formatters
3
- module Readers
4
- # This class simply reads a plain text file.
5
- class Txt
6
- # Build an entity from a string in plain text format.
7
- #
8
- # Options: none.
9
- def self.read(document, options = {})
10
- f = File.read(document.file)
11
- document << Treat::Entities::Entity.from_string(f)
12
- document
13
- end
14
- end
15
- end
1
+ # This class simply reads a plain text file.
2
+ class Treat::Formatters::Readers::TXT
3
+
4
+ # Build an entity from a string
5
+ # in plain text format.
6
+ #
7
+ # Options: none.
8
+ def self.read(document, options = {})
9
+ document.value = File.read(document.file)
10
+ document.set :format, :txt
11
+ document
16
12
  end
13
+
17
14
  end
@@ -1,40 +1,77 @@
1
- module Treat
2
- module Formatters
3
- module Readers
4
- class XML
5
- require 'stanford-core-nlp'
6
- require 'cgi'
7
- # By default, backup the XML text while cleaning.
8
- DefaultOptions = { :clean => true, :backup => false }
9
- @@xml_cleaner = nil
10
- # Read the XML document and strip it of its markup.
11
- # Also splits the text into sentences and tokenizes it?
12
- #
13
- # Options:
14
- #
15
- # - (Boolean) :clean => whether to strip XML markup.
16
- # - (Boolean) :backup => whether to backup the XML
17
- # markup while cleaning.
18
- def self.read(document, options = {})
19
- options = DefaultOptions.merge(options)
20
- document << Treat::Entities::Entity.from_string(File.read(document.file))
21
- if options[:clean]
22
- @@xml_cleaner ||= StanfordCoreNLP.load(:tokenize, :ssplit, :cleanxml)
23
- document.each do |zone|
24
- text = StanfordCoreNLP::Text.new(zone.to_s)
25
- @@xml_cleaner.annotate(text)
26
- sentences = []
27
- text.get(:sentences) do |sentence|
28
- sentences << Treat::Entities::Sentence.from_string(sentence.to_s)
29
- end
30
- val = sentences.join(' ')
31
- zone.set :xml_value, CGI.escapeHTML(text.to_s) if options[:backup]
32
- zone.value = val
33
- end
34
- end
35
- document
1
+ class Treat::Formatters::Readers::XML
2
+
3
+ require 'treat/loaders/stanford'
4
+ require 'cgi'
5
+
6
+ # By default, don't backup the XML
7
+ # document while cleaning it.
8
+ DefaultOptions = {
9
+ :keep_html => false
10
+ }
11
+
12
+ # Hold one instance of the XML cleaner.
13
+ @@xml_reader = nil
14
+
15
+ # Read the XML document and strip it of its markup.
16
+ # Also segments and tokenizes the text.
17
+ #
18
+ # Options:
19
+ #
20
+ # - (Boolean) :keep_xml => whether to backup the XML
21
+ # markup while cleaning.
22
+ def self.read(document, options = {})
23
+
24
+ raise 'Not implemented.'
25
+
26
+ options = DefaultOptions.merge(options)
27
+
28
+ xml = File.read(document.file)
29
+
30
+ @@xml_reader ||= StanfordCoreNLP.load(
31
+ :tokenize, :ssplit, :cleanxml)
32
+
33
+ text = StanfordCoreNLP::Text.new(xml)
34
+ @@xml_reader.annotate(text)
35
+
36
+ text.get(:sentences).each do |sentence|
37
+
38
+ s = Treat::Entities::Sentence.
39
+ from_string(sentence.to_s, true)
40
+
41
+ sentence.get(:tokens).each do |token|
42
+ val = token.value.to_s.strip.gsub('\/', '/')
43
+ next if val =~ /^<[^>]+>$/
44
+
45
+ t = Treat::Entities::Token.
46
+ from_string(val)
47
+ c = token.get(:xml_context)
48
+
49
+ if c
50
+ context = []
51
+ c.each { |tag| context << tag.to_s }
52
+ t.set :xml_context, context
36
53
  end
54
+
55
+ s << t
56
+
37
57
  end
58
+
59
+ if Treat::Entities::Zone.from_string('')
60
+ section << s
61
+ end
62
+
63
+ if options[:backup]
64
+ document.set :xml_value,
65
+ CGI.escapeHTML(text.to_s)
66
+ end
67
+
68
+ document.value = ''
69
+
38
70
  end
71
+
72
+ document.set :format, :xml
73
+ document
74
+
39
75
  end
40
- end
76
+
77
+ end
@@ -1,85 +1,86 @@
1
- module Treat
2
- module Formatters
3
- module Serializers
4
- # This class converts an entity to a storable XML format.
5
- class XML
6
- # Reauire the Nokogiri XML parser.
7
- require 'nokogiri'
8
- # Serialize an entity tree in XML format.
9
- #
10
- # Options:
11
- # - (String) :file => a file to write to.
12
- def self.serialize(entity, options = {})
13
- options = options.merge({:indent => 0}) if options[:indent].nil?
14
- indent = options[:indent]
15
- if options[:indent] == 0
16
- enc = entity.to_s.encoding.to_s.downcase
17
- string = "<?xml version=\"1.0\" encoding=\"#{enc}\" standalone=\"no\" ?>\n<treat>"
18
- else
19
- string = ''
20
- end
21
- spaces = ''
22
- options[:indent].times { spaces << ' ' }
23
- attributes = " id='#{entity.id}' "
24
- if !entity.features.nil? && entity.features.size != 0
25
- attributes << ' '
26
- entity.features.each_pair do |feature, value|
27
- if value.is_a? Entities::Entity
28
- attributes << "#{feature}='#{value.id}' "
29
- else
30
- attributes << "#{feature}='#{escape(value)}' "
31
- end
32
- end
33
- attributes << "dependencies='"
34
- a = []
35
- entity.dependencies.each do |dependency|
36
- a << ("{target: #{dependency.target}, type: #{dependency.type}, " +
37
- "directed: #{dependency.directed}, " +
38
- "direction: #{dependency.direction}}" )
39
- end
40
- # Structs.
41
- attributes << a.join('--') + "'"
42
- end
43
- tag = entity.class.to_s.split('::')[-1].downcase
44
- unless entity.is_a?(Treat::Entities::Token)
45
- string += "\n"
46
- end
47
- string += "#{spaces}<#{tag}#{attributes}>"
48
- if entity.has_children?
49
- options[:indent] += 1
50
- entity.children.each do |child|
51
- string =
52
- string +
53
- serialize(child, options)
54
- end
55
- options[:indent] -= 1
56
- else
57
- string = string + "#{escape(entity.value)}"
58
- end
59
- unless entity.is_a?(Treat::Entities::Token)
60
- string += "\n#{spaces}"
61
- end
62
- string += "</#{tag}>\n"
63
- if indent == 0
64
- string += "\n</treat>"
65
- if options[:file]
66
- File.open(options[:file], 'w') { |f| f.write(string) }
67
- end
68
- # puts string
69
- end
70
- string
1
+ # This class converts an entity to a storable XML format.
2
+ class Treat::Formatters::Serializers::XML
3
+
4
+ # Reauire the Nokogiri XML parser.
5
+ require 'nokogiri'
6
+ # Serialize an entity tree in XML format.
7
+ #
8
+ # Options:
9
+ # - (String) :file => a file to write to.
10
+ def self.serialize(entity, options = {})
11
+
12
+ options = options.merge({:indent => 0}) if options[:indent].nil?
13
+ indent = options[:indent]
14
+ if options[:indent] == 0
15
+ enc = entity.to_s.encoding.to_s.downcase
16
+ string = "<?xml version=\"1.0\" encoding=\"#{enc}\" standalone=\"no\" ?>\n<treat>\n"
17
+ else
18
+ string = ''
19
+ end
20
+ spaces = ''
21
+ options[:indent].times { spaces << ' ' }
22
+ attributes = " id='#{entity.id}'"
23
+ if !entity.features.nil? && entity.features.size != 0
24
+ attributes << ' '
25
+ entity.features.each_pair do |feature, value|
26
+ if value.is_a? Treat::Entities::Entity
27
+ attributes << "#{feature}='#{value.id}' "
28
+ else
29
+ attributes << "#{feature}='#{escape(value)}' "
30
+ end
31
+ end
32
+ unless entity.dependencies.empty?
33
+ attributes << "dependencies='"
34
+ a = []
35
+ entity.dependencies.each do |dependency|
36
+ a << ("{target: #{dependency.target}, type: #{dependency.type}, " +
37
+ "directed: #{dependency.directed}, " +
38
+ "direction: #{dependency.direction}}" )
71
39
  end
72
-
73
- def self.escape(input)
74
- result = input.to_s.dup
75
- result.gsub!("&", "&amp;")
76
- result.gsub!("<", "&lt;")
77
- result.gsub!(">", "&gt;")
78
- result.gsub!("'", "&apos;")
79
- result.gsub!("\"", "&quot;")
80
- result
40
+ # Structs.
41
+ attributes << a.join(',') + "'"
42
+ end
43
+ end
44
+ tag = entity.class.to_s.split('::')[-1].downcase
45
+ string += "#{spaces}<#{tag}#{attributes}>"
46
+ unless entity.is_a?(Treat::Entities::Token)
47
+ string += "\n"
48
+ end
49
+ if entity.has_children?
50
+ options[:indent] += 1
51
+ entity.children.each do |child|
52
+ string =
53
+ string +
54
+ serialize(child, options)
55
+ end
56
+ options[:indent] -= 1
57
+ else
58
+ string = string + "#{escape(entity.value)}"
59
+ end
60
+ unless entity.is_a?(Treat::Entities::Token)
61
+ string += "#{spaces}"
62
+ end
63
+ string += "</#{tag}>\n"
64
+ if indent == 0
65
+ string += "\n</treat>"
66
+ if options[:file]
67
+ File.open(options[:file], 'w') do |f|
68
+ f.write(string)
81
69
  end
82
70
  end
71
+ # puts string
83
72
  end
73
+ string
74
+ end
75
+
76
+ def self.escape(input)
77
+ result = input.to_s.dup
78
+ result.gsub!("&", "&amp;")
79
+ result.gsub!("<", "&lt;")
80
+ result.gsub!(">", "&gt;")
81
+ result.gsub!("'", "&apos;")
82
+ result.gsub!("\"", "&quot;")
83
+ result
84
84
  end
85
+
85
86
  end
@@ -1,22 +1,23 @@
1
- module Treat
2
- module Formatters
3
- module Serializers
4
- # Require the Psych YAML serializer.
5
- require 'psych'
6
- # This class serializes entities in YAML format.
7
- class YAML
8
- # Serialize an entity in YAML format.
9
- #
10
- # Options:
11
- # - (String) :file => a file to write to.
12
- def self.serialize(entity, options = {})
13
- yaml = ::Psych.dump(entity)
14
- if options[:file]
15
- File.open(options[:file], 'w') { |f| f.write(yaml) }
16
- end
17
- yaml
18
- end
1
+ # This class serializes entities in YAML format.
2
+ class Treat::Formatters::Serializers::YAML
3
+
4
+ silence_warnings do
5
+ # Require the Psych YAML serializer.
6
+ require 'psych'
7
+ end
8
+
9
+ # Serialize an entity in YAML format.
10
+ #
11
+ # Options:
12
+ # - (String) :file => a file to write to.
13
+ def self.serialize(entity, options = {})
14
+ yaml = ::Psych.dump(entity)
15
+ if options[:file]
16
+ File.open(options[:file], 'w') do |f|
17
+ f.write(yaml)
19
18
  end
20
19
  end
20
+ yaml
21
21
  end
22
+
22
23
  end
@@ -1,25 +1,15 @@
1
- module Treat
2
- module Formatters
3
- module Unserializers
4
- # This class doesn't perform any unserializing;
5
- # it simply routes the document to an unserializer
6
- # based on the file extension of the document.
7
- class Autoselect
8
- # Unserialize any supported file format.
9
- #
10
- # Options: none.
11
- def self.unserialize(document, options = {})
12
- ext = document.file.split('.')[-1]
13
- if ext == 'yaml' || ext == 'yml'
14
- document.unserialize(:yaml)
15
- elsif ext == 'xml'
16
- document.unserialize(:xml)
17
- else
18
- raise "File #{document.file} was not recognized "+
19
- "as a supported serialized format."
20
- end
21
- end
22
- end
1
+ class Treat::Formatters::Unserializers::Autoselect
2
+
3
+ def self.unserialize(document, options = {})
4
+ file = document.file
5
+ if file.index('yml') || file.index('yaml')
6
+ document.unserialize(:yaml, options)
7
+ elsif file.index('xml')
8
+ document.unserialize(:xml, options)
9
+ else
10
+ raise Treat::Exception,
11
+ "Unreadable serialized format for file #{file}."
23
12
  end
24
13
  end
14
+
25
15
  end