treat 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. data/LICENSE +4 -4
  2. data/TODO +21 -54
  3. data/lib/economist/half_cocked_basel.txt +16 -0
  4. data/lib/economist/hose_and_dry.doc +0 -0
  5. data/lib/economist/hungarys_troubles.abw +70 -0
  6. data/lib/economist/republican_nomination.pdf +0 -0
  7. data/lib/economist/saving_the_euro.odt +0 -0
  8. data/lib/economist/to_infinity_and_beyond.txt +15 -0
  9. data/lib/economist/zero_sum.html +91 -0
  10. data/lib/treat.rb +58 -72
  11. data/lib/treat/buildable.rb +59 -15
  12. data/lib/treat/categories.rb +26 -14
  13. data/lib/treat/category.rb +2 -2
  14. data/lib/treat/delegatable.rb +65 -48
  15. data/lib/treat/doable.rb +44 -0
  16. data/lib/treat/entities.rb +34 -14
  17. data/lib/treat/entities/collection.rb +2 -0
  18. data/lib/treat/entities/document.rb +3 -2
  19. data/lib/treat/entities/entity.rb +105 -90
  20. data/lib/treat/entities/phrases.rb +17 -0
  21. data/lib/treat/entities/tokens.rb +28 -13
  22. data/lib/treat/entities/zones.rb +20 -0
  23. data/lib/treat/extractors.rb +49 -11
  24. data/lib/treat/extractors/coreferences/stanford.rb +68 -0
  25. data/lib/treat/extractors/date/chronic.rb +32 -0
  26. data/lib/treat/extractors/date/ruby.rb +25 -0
  27. data/lib/treat/extractors/keywords/tf_idf.rb +26 -0
  28. data/lib/treat/extractors/keywords/{topics_frequency.rb → topics_tf_idf.rb} +15 -7
  29. data/lib/treat/{detectors/language/language_detector.rb → extractors/language/language_extractor.rb} +5 -2
  30. data/lib/treat/extractors/language/what_language.rb +49 -0
  31. data/lib/treat/extractors/named_entity_tag/stanford.rb +53 -0
  32. data/lib/treat/extractors/roles/naive.rb +73 -0
  33. data/lib/treat/extractors/statistics/frequency_in.rb +6 -13
  34. data/lib/treat/extractors/statistics/{position_in_parent.rb → position_in.rb} +1 -1
  35. data/lib/treat/extractors/statistics/tf_idf.rb +89 -21
  36. data/lib/treat/extractors/statistics/transition_matrix.rb +11 -11
  37. data/lib/treat/extractors/statistics/transition_probability.rb +4 -4
  38. data/lib/treat/extractors/time/nickel.rb +30 -12
  39. data/lib/treat/extractors/topic_words/lda.rb +9 -9
  40. data/lib/treat/extractors/topics/reuters.rb +14 -15
  41. data/lib/treat/extractors/topics/reuters/region.xml +1 -0
  42. data/lib/treat/features.rb +7 -0
  43. data/lib/treat/formatters/readers/abw.rb +6 -1
  44. data/lib/treat/formatters/readers/autoselect.rb +5 -6
  45. data/lib/treat/formatters/readers/doc.rb +3 -1
  46. data/lib/treat/formatters/readers/html.rb +1 -1
  47. data/lib/treat/formatters/readers/image.rb +43 -0
  48. data/lib/treat/formatters/readers/odt.rb +1 -2
  49. data/lib/treat/formatters/readers/pdf.rb +9 -1
  50. data/lib/treat/formatters/readers/xml.rb +40 -0
  51. data/lib/treat/formatters/serializers/xml.rb +50 -14
  52. data/lib/treat/formatters/serializers/yaml.rb +7 -2
  53. data/lib/treat/formatters/unserializers/xml.rb +33 -7
  54. data/lib/treat/formatters/visualizers/dot.rb +90 -20
  55. data/lib/treat/formatters/visualizers/short_value.rb +2 -2
  56. data/lib/treat/formatters/visualizers/standoff.rb +2 -2
  57. data/lib/treat/formatters/visualizers/tree.rb +1 -1
  58. data/lib/treat/formatters/visualizers/txt.rb +13 -4
  59. data/lib/treat/group.rb +16 -10
  60. data/lib/treat/helpers/linguistics_loader.rb +18 -0
  61. data/lib/treat/inflectors.rb +10 -0
  62. data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
  63. data/lib/treat/inflectors/conjugations/linguistics.rb +5 -12
  64. data/lib/treat/inflectors/declensions/english.rb +319 -0
  65. data/lib/treat/inflectors/declensions/linguistics.rb +12 -11
  66. data/lib/treat/inflectors/ordinal_words/linguistics.rb +3 -3
  67. data/lib/treat/install.rb +59 -0
  68. data/lib/treat/kernel.rb +18 -8
  69. data/lib/treat/languages.rb +18 -11
  70. data/lib/treat/languages/arabic.rb +4 -2
  71. data/lib/treat/languages/chinese.rb +6 -2
  72. data/lib/treat/languages/dutch.rb +16 -0
  73. data/lib/treat/languages/english.rb +47 -19
  74. data/lib/treat/languages/french.rb +8 -5
  75. data/lib/treat/languages/german.rb +9 -6
  76. data/lib/treat/languages/greek.rb +16 -0
  77. data/lib/treat/languages/italian.rb +6 -3
  78. data/lib/treat/languages/polish.rb +16 -0
  79. data/lib/treat/languages/portuguese.rb +16 -0
  80. data/lib/treat/languages/russian.rb +16 -0
  81. data/lib/treat/languages/spanish.rb +16 -0
  82. data/lib/treat/languages/swedish.rb +16 -0
  83. data/lib/treat/languages/tags.rb +377 -0
  84. data/lib/treat/lexicalizers.rb +34 -23
  85. data/lib/treat/lexicalizers/category/from_tag.rb +17 -10
  86. data/lib/treat/lexicalizers/linkages/naive.rb +51 -51
  87. data/lib/treat/lexicalizers/synsets/wordnet.rb +5 -1
  88. data/lib/treat/lexicalizers/tag/brill.rb +35 -40
  89. data/lib/treat/lexicalizers/tag/lingua.rb +19 -14
  90. data/lib/treat/lexicalizers/tag/stanford.rb +59 -68
  91. data/lib/treat/lexicalizers/tag/tagger.rb +29 -0
  92. data/lib/treat/processors.rb +8 -8
  93. data/lib/treat/processors/chunkers/txt.rb +4 -4
  94. data/lib/treat/processors/parsers/enju.rb +114 -99
  95. data/lib/treat/processors/parsers/stanford.rb +109 -41
  96. data/lib/treat/processors/segmenters/punkt.rb +17 -18
  97. data/lib/treat/processors/segmenters/punkt/dutch.yaml +9716 -0
  98. data/lib/treat/processors/segmenters/punkt/english.yaml +10340 -0
  99. data/lib/treat/processors/segmenters/punkt/french.yaml +43159 -0
  100. data/lib/treat/processors/segmenters/punkt/german.yaml +9572 -0
  101. data/lib/treat/processors/segmenters/punkt/greek.yaml +6050 -0
  102. data/lib/treat/processors/segmenters/punkt/italian.yaml +14748 -0
  103. data/lib/treat/processors/segmenters/punkt/polish.yaml +9751 -0
  104. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +13662 -0
  105. data/lib/treat/processors/segmenters/punkt/russian.yaml +4237 -0
  106. data/lib/treat/processors/segmenters/punkt/spanish.yaml +24034 -0
  107. data/lib/treat/processors/segmenters/punkt/swedish.yaml +10001 -0
  108. data/lib/treat/processors/segmenters/stanford.rb +38 -37
  109. data/lib/treat/processors/segmenters/tactful.rb +5 -4
  110. data/lib/treat/processors/tokenizers/macintyre.rb +7 -6
  111. data/lib/treat/processors/tokenizers/multilingual.rb +2 -3
  112. data/lib/treat/processors/tokenizers/perl.rb +2 -2
  113. data/lib/treat/processors/tokenizers/punkt.rb +6 -2
  114. data/lib/treat/processors/tokenizers/stanford.rb +25 -24
  115. data/lib/treat/processors/tokenizers/tactful.rb +1 -2
  116. data/lib/treat/proxies.rb +2 -35
  117. data/lib/treat/registrable.rb +17 -22
  118. data/lib/treat/sugar.rb +11 -11
  119. data/lib/treat/tree.rb +27 -17
  120. data/lib/treat/viewable.rb +29 -0
  121. data/lib/treat/visitable.rb +1 -1
  122. data/test/tc_entity.rb +56 -49
  123. data/test/tc_extractors.rb +41 -18
  124. data/test/tc_formatters.rb +7 -8
  125. data/test/tc_inflectors.rb +19 -24
  126. data/test/tc_lexicalizers.rb +12 -19
  127. data/test/tc_processors.rb +26 -12
  128. data/test/tc_resources.rb +2 -7
  129. data/test/tc_treat.rb +20 -22
  130. data/test/tc_tree.rb +4 -4
  131. data/test/tests.rb +3 -5
  132. data/test/texts.rb +13 -14
  133. data/tmp/INFO +1 -0
  134. metadata +78 -158
  135. data/bin/INFO +0 -1
  136. data/examples/benchmark.rb +0 -81
  137. data/examples/keywords.rb +0 -148
  138. data/lib/treat/detectors.rb +0 -31
  139. data/lib/treat/detectors/encoding/r_chardet19.rb +0 -27
  140. data/lib/treat/detectors/format/file.rb +0 -36
  141. data/lib/treat/detectors/language/what_language.rb +0 -29
  142. data/lib/treat/entities/constituents.rb +0 -15
  143. data/lib/treat/entities/sentence.rb +0 -8
  144. data/lib/treat/extractors/named_entity/abner.rb +0 -20
  145. data/lib/treat/extractors/named_entity/stanford.rb +0 -174
  146. data/lib/treat/extractors/statistics/frequency_of.rb +0 -15
  147. data/lib/treat/extractors/time/chronic.rb +0 -20
  148. data/lib/treat/extractors/time/native.rb +0 -18
  149. data/lib/treat/formatters/readers/gocr.rb +0 -26
  150. data/lib/treat/formatters/readers/ocropus.rb +0 -31
  151. data/lib/treat/formatters/visualizers/html.rb +0 -13
  152. data/lib/treat/formatters/visualizers/inspect.rb +0 -20
  153. data/lib/treat/inflectors/declensions/en.rb +0 -18
  154. data/lib/treat/languages/categories.rb +0 -5
  155. data/lib/treat/languages/english/categories.rb +0 -23
  156. data/lib/treat/languages/english/tags.rb +0 -352
  157. data/lib/treat/languages/xinhua.rb +0 -12
  158. data/lib/treat/lexicalizers/synsets/rita_wn.rb +0 -23
  159. data/lib/treat/string.rb +0 -5
  160. data/test/tc_detectors.rb +0 -26
@@ -4,8 +4,10 @@ module Treat
4
4
  class Doc
5
5
  def self.read(document, options = {})
6
6
  f = `antiword #{document.file}`
7
+ f.gsub!("\n\n", '#keep#')
8
+ f.gsub!("\n", ' ')
9
+ f.gsub!('#keep#', "\n\n")
7
10
  document << Treat::Entities::Entity.from_string(f)
8
- document
9
11
  end
10
12
  end
11
13
  end
@@ -7,7 +7,7 @@ module Treat
7
7
  # Require Hpricot.
8
8
  silence_warnings { require 'hpricot' }
9
9
  # By default, backup the HTML text while cleaning.
10
- DefaultOptions = { clean: true, backup: false }
10
+ DefaultOptions = { :clean => true, :backup => false }
11
11
  # Read the HTML document and strip it of its markup.
12
12
  #
13
13
  # Options:
@@ -0,0 +1,43 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ # This class is a wrapper for the Google Ocropus
5
+ # optical character recognition (OCR) engine.
6
+ #
7
+ # "OCRopus(tm) is a state-of-the-art document
8
+ # analysis and OCR system, featuring pluggable
9
+ # layout analysis, pluggable character recognition,
10
+ # statistical natural language modeling, and multi-
11
+ # lingual capabilities."
12
+ #
13
+ # Original paper:
14
+ # Breuel, Thomas M. The Ocropus Open Source OCR System.
15
+ # DFKI and U. Kaiserslautern, Germany.
16
+ class Image
17
+ # Read a file using the Google Ocropus reader.
18
+ #
19
+ # Options:
20
+ # - (Boolean) :silent => whether to silence Ocropus.
21
+ def self.read(document, options = {})
22
+ read = lambda do |doc|
23
+ create_temp_dir do |tmp|
24
+ `ocropus book2pages #{tmp}/out #{doc.file}`
25
+ `ocropus pages2lines #{tmp}/out`
26
+ `ocropus lines2fsts #{tmp}/out`
27
+ `ocropus buildhtml #{tmp}/out > #{tmp}/output.html`
28
+ f = document.file
29
+ doc.remove_all!
30
+ doc.set :file, "#{tmp}/output.html"
31
+ doc.read(:html)
32
+ doc.set :file, f
33
+ end
34
+ end
35
+ options[:silent] ?
36
+ silence_stdout { read.call(document) } :
37
+ read.call(document)
38
+ document
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -17,7 +17,6 @@ module Treat
17
17
  raise "Couldn't unzip dot file #{document.file}!" unless f
18
18
  xml_h = OOXmlHandler.new
19
19
  REXML::Document.parse_stream(f, xml_h)
20
- puts xml_h.plain_text
21
20
  document << Treat::Entities::Entity.from_string(xml_h.plain_text)
22
21
  document
23
22
  end
@@ -39,7 +38,7 @@ module Treat
39
38
  s = s.strip
40
39
  if s.length > 0
41
40
  @plain_text << s
42
- @plain_text << "\n"
41
+ @plain_text << "\n\n"
43
42
  end
44
43
  end
45
44
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  module Treat
2
3
  module Formatters
3
4
  module Readers
@@ -10,7 +11,14 @@ module Treat
10
11
  def self.read(document, options = {})
11
12
  create_temp_file(:txt) do |tmp|
12
13
  `pdftotext #{document.file} #{tmp} `.strip
13
- document << Treat::Entities::Entity.from_string(File.read(tmp))
14
+ f = File.read(tmp)
15
+ f.gsub!("\t\r ", '')
16
+ f.gsub!('-­‐', '-')
17
+ f.gsub!("\n\n", '#keep#')
18
+ f.gsub!("\n", ' ')
19
+ f.gsub!(" ", ' ')
20
+ f.gsub!('#keep#', "\n\n")
21
+ document << Treat::Entities::Entity.from_string(f)
14
22
  end
15
23
  document
16
24
  end
@@ -0,0 +1,40 @@
1
+ module Treat
2
+ module Formatters
3
+ module Readers
4
+ class XML
5
+ require 'stanford-core-nlp'
6
+ require 'cgi'
7
+ # By default, backup the XML text while cleaning.
8
+ DefaultOptions = { :clean => true, :backup => false }
9
+ @@xml_cleaner = nil
10
+ # Read the XML document and strip it of its markup.
11
+ # Also splits the text into sentences and tokenizes it?
12
+ #
13
+ # Options:
14
+ #
15
+ # - (Boolean) :clean => whether to strip XML markup.
16
+ # - (Boolean) :backup => whether to backup the XML
17
+ # markup while cleaning.
18
+ def self.read(document, options = {})
19
+ options = DefaultOptions.merge(options)
20
+ document << Treat::Entities::Entity.from_string(File.read(document.file))
21
+ if options[:clean]
22
+ @@xml_cleaner ||= StanfordCoreNLP.load(:tokenize, :ssplit, :cleanxml)
23
+ document.each do |zone|
24
+ text = StanfordCoreNLP::Text.new(zone.to_s)
25
+ @@xml_cleaner.annotate(text)
26
+ sentences = []
27
+ text.get(:sentences) do |sentence|
28
+ sentences << Treat::Entities::Sentence.from_string(sentence.to_s)
29
+ end
30
+ val = sentences.join(' ')
31
+ zone.set :xml_value, CGI.escapeHTML(text.to_s) if options[:backup]
32
+ zone.value = val
33
+ end
34
+ end
35
+ document
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -6,44 +6,80 @@ module Treat
6
6
  # Reauire the Nokogiri XML parser.
7
7
  require 'nokogiri'
8
8
  # Serialize an entity tree in XML format.
9
+ #
10
+ # Options:
11
+ # - (String) :file => a file to write to.
9
12
  def self.serialize(entity, options = {})
10
- options = {:indent => 0} if options[:indent].nil?
13
+ options = options.merge({:indent => 0}) if options[:indent].nil?
14
+ indent = options[:indent]
11
15
  if options[:indent] == 0
12
- # enc = entity.encoding(:r_chardet19).to_s.gsub('_', '-').upcase
13
- string = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\" ?>" # Fix
16
+ enc = entity.to_s.encoding.to_s.downcase
17
+ string = "<?xml version=\"1.0\" encoding=\"#{enc}\" standalone=\"no\" ?>\n<treat>"
14
18
  else
15
19
  string = ''
16
20
  end
17
21
  spaces = ''
18
22
  options[:indent].times { spaces << ' ' }
19
- attributes = ''
23
+ attributes = " id='#{entity.id}' "
20
24
  if !entity.features.nil? && entity.features.size != 0
21
- attributes = ' '
25
+ attributes << ' '
22
26
  entity.features.each_pair do |feature, value|
23
27
  if value.is_a? Entities::Entity
24
28
  attributes << "#{feature}='#{value.id}' "
25
29
  else
26
- attributes << "#{feature}='#{value}' "
30
+ attributes << "#{feature}='#{escape(value)}' "
27
31
  end
28
32
  end
29
- entity.edges.each_pair do |id,edge|
30
- attributes << "#{edge}='#{id}' "
33
+ attributes << "dependencies='"
34
+ a = []
35
+ entity.dependencies.each do |dependency|
36
+ a << ("{target: #{dependency.target}, type: #{dependency.type}, " +
37
+ "directed: #{dependency.directed}, " +
38
+ "direction: #{dependency.direction}}" )
31
39
  end
40
+ # Structs.
41
+ attributes << a.join('--') + "'"
32
42
  end
33
43
  tag = entity.class.to_s.split('::')[-1].downcase
34
- string += "\n#{spaces}<#{tag}#{attributes[0..-2]}>"
44
+ unless entity.is_a?(Treat::Entities::Token)
45
+ string += "\n"
46
+ end
47
+ string += "#{spaces}<#{tag}#{attributes}>"
35
48
  if entity.has_children?
36
49
  options[:indent] += 1
37
- entity.children.each do |child|
38
- string = string + serialize(child, options)
50
+ entity.children.each do |child|
51
+ string =
52
+ string +
53
+ serialize(child, options)
39
54
  end
40
55
  options[:indent] -= 1
41
56
  else
42
- string = string + "\n#{spaces}#{entity.value}"
57
+ string = string + "#{escape(entity.value)}"
58
+ end
59
+ unless entity.is_a?(Treat::Entities::Token)
60
+ string += "\n#{spaces}"
43
61
  end
44
- string + "\n#{spaces}</#{tag}>"
62
+ string += "</#{tag}>\n"
63
+ if indent == 0
64
+ string += "\n</treat>"
65
+ if options[:file]
66
+ File.open(options[:file], 'w') { |f| f.write(string) }
67
+ end
68
+ # puts string
69
+ end
70
+ string
71
+ end
72
+
73
+ def self.escape(input)
74
+ result = input.to_s.dup
75
+ result.gsub!("&", "&amp;")
76
+ result.gsub!("<", "&lt;")
77
+ result.gsub!(">", "&gt;")
78
+ result.gsub!("'", "&apos;")
79
+ result.gsub!("\"", "&quot;")
80
+ result
45
81
  end
46
82
  end
47
83
  end
48
84
  end
49
- end
85
+ end
@@ -7,9 +7,14 @@ module Treat
7
7
  class YAML
8
8
  # Serialize an entity in YAML format.
9
9
  #
10
- # Options: none.
10
+ # Options:
11
+ # - (String) :file => a file to write to.
11
12
  def self.serialize(entity, options = {})
12
- ::Psych.dump(entity)
13
+ yaml = ::Psych.dump(entity)
14
+ if options[:file]
15
+ File.open(options[:file], 'w') { |f| f.write(yaml) }
16
+ end
17
+ yaml
13
18
  end
14
19
  end
15
20
  end
@@ -11,6 +11,8 @@ module Treat
11
11
  def self.unserialize(document, options = {})
12
12
  # Read in the XML file.
13
13
  xml = File.read(document.file)
14
+ xml.gsub!('<treat>', '')
15
+ xml.gsub!('</treat>', '')
14
16
  xml_reader = Nokogiri::XML::Reader.from_memory(xml)
15
17
  current_element = nil
16
18
  previous_depth = 0
@@ -33,13 +35,33 @@ module Treat
33
35
  end
34
36
 
35
37
  id = nil; value = ''
36
- attributes = {}; edges = {}
37
- unless xml_reader.attributes.empty?
38
+ attributes = {}
39
+ dependencies = []
40
+ unless xml_reader.attributes.size == 0
38
41
  xml_reader.attributes.each_pair do |k,v|
39
42
  if k == 'id'
40
- id = v
41
- elsif k == 'edges'
42
- edges = v
43
+ id = v.to_i
44
+ elsif k == 'dependencies'
45
+ a = v.split('--')
46
+ a.each do |b|
47
+ c = b.split(';')
48
+ c.each do |dep|
49
+ vals = []
50
+ dep.split(',').each do |name_val|
51
+ name_val = name_val[0..-2] if name_val[-1] == '}'
52
+ d = name_val.split(':')[1]
53
+ vals << d.strip if d
54
+ end
55
+
56
+ target, type, directed, direction = *vals
57
+ dependencies << [
58
+ target.to_i,
59
+ type,
60
+ (directed == 'true' ? true : false),
61
+ direction.to_i
62
+ ]
63
+ end
64
+ end
43
65
  elsif k == 'value'
44
66
  value = v
45
67
  else
@@ -60,9 +82,13 @@ module Treat
60
82
  end
61
83
  current_element.features = attributes
62
84
  current_element.features = attributes
63
- current_element.edges = edges
85
+ dependencies.each do |dependency|
86
+ target, type, directed, direction = *dependency
87
+ current_element.link(target, type, directed, direction)
88
+ end
64
89
  else
65
- current_value = xml_reader.value.strip
90
+ current_value = xml_reader.value ?
91
+ xml_reader.value.strip : ''
66
92
  if current_value && current_value != ''
67
93
  current_element.value = current_value
68
94
  current_element.register_token(current_element)
@@ -2,7 +2,16 @@ module Treat
2
2
  module Formatters
3
3
  module Visualizers
4
4
  class Dot
5
- DefaultOptions = {colors: {}, :features => :all}
5
+ require 'date'
6
+ DefaultOptions = {
7
+ :colors => {},
8
+ :features => :all,
9
+ :file => nil,
10
+ :remove_types => [],
11
+ :remove_features => [],
12
+ :colors => nil,
13
+ :first => true # For internal purposes only.
14
+ }
6
15
  # Create the top-most graph structure
7
16
  # and delegate the creation of the graph
8
17
  # nodes to to_dot.
@@ -11,53 +20,114 @@ module Treat
11
20
  string = "graph {"
12
21
  string << self.to_dot(entity, options)
13
22
  string << "\n}"
23
+ if options[:file]
24
+ File.open(options[:file], 'w') { |f| f.write(string) }
25
+ end
26
+ string
14
27
  end
15
28
  # dot -Tpdf test4.dot > test4.pdf
16
29
  def self.to_dot(entity, options)
30
+ # Filter out specified types.
31
+ match_types = lambda do |t1, t2s|
32
+ f = false
33
+ t2s.each { |t2| f = true if Treat::Entities.match_types[t1][t2] }
34
+ f
35
+ end
36
+ return '' if match_types.call(entity.type, options[:remove_types])
17
37
  # Id
18
38
  string = ''
19
39
  label = ''
40
+ sv = entity.short_value.inspect[1..-2]
20
41
  string = "\n#{entity.id} ["
21
- # Value
22
- if entity.is_a?(Treat::Entities::Token)
23
- label = entity.to_s
24
- else
25
- label = entity.type.to_s.capitalize + " "
26
- if entity.is_leaf?
27
- label = entity.short_value.gsub(' [...]', " [...] \\n")
28
- end
29
- end
42
+ label = "#{entity.type.to_s.capitalize}\\n\\\"#{sv}\\\""
43
+ label.gsub!(' [...]', " [...] \\n")
30
44
  # Features
31
45
  if entity.has_features?
32
46
  unless options[:features] == :none
33
47
  label << "\\n"
34
48
  entity.features.each do |feature, value|
49
+ next if options[:remove_features].include?(feature)
35
50
  if options[:features] == :all ||
36
51
  options[:features].include?(feature)
37
52
  if value.is_a?(Treat::Entities::Entity)
38
- label << "\\n#{feature}=\\\"*#{value.id}\\\","
53
+ label << "\\n#{feature}: \\\"*#{value.id}\\\""
54
+ elsif value.is_a?(Struct)
55
+ label << "\\n#{feature}: \\n\{ "
56
+ value.members.each do |member|
57
+ v = value.send(member)
58
+ v = v.to_s if v.is_a?(DateTime)
59
+ v = "*#{v.id}" if v.is_a?(Treat::Entities::Entity)
60
+ v = v ? v.inspect : ' -- '
61
+ v.gsub!('[', '\[')
62
+ v.gsub!('{', '\}')
63
+ v.gsub!(']', '\]')
64
+ v.gsub!('}', '\}')
65
+ v.gsub!('"', '\"')
66
+ label << "#{member}: #{v},\\n"
67
+ end
68
+ label = label[0..-4] unless label[-2] == '{'
69
+ label << "\},"
70
+ elsif value.is_a?(Hash)
71
+ label << "\\n#{feature}: \\n\{ "
72
+ value.each do |k,v|
73
+ v = v ? v.inspect : ' -- '
74
+ v.gsub!('[', '\[')
75
+ v.gsub!('{', '\}')
76
+ v.gsub!(']', '\]')
77
+ v.gsub!('}', '\}')
78
+ v.gsub!('"', '\"')
79
+ label << "#{k}: #{v},\\n"
80
+ end
81
+ label = label[0..-4] unless label[-2] == '{'
82
+ label << "\},"
83
+ elsif value.is_a?(Array)
84
+ label << "\\n#{feature}: \\n\[ "
85
+ value.each do |e|
86
+ e = "*#{e.id}" if e.is_a?(Treat::Entities::Entity)
87
+ label << "#{e},\\n"
88
+ end
89
+ label = label[0..-4] unless label[-2] == '['
90
+ label << " \]"
39
91
  else
40
- label << "\\n#{feature}=\\\"#{value}\\\","
92
+ label << "\\n#{feature}: \\\"#{value}\\\""
41
93
  end
42
94
  end
43
95
  end
44
96
  end
45
97
  end
98
+ color = nil
99
+ if options[:colors]
100
+ options[:colors].each do |col, lambda|
101
+ color = col.to_s if lambda.call(entity)
102
+ break if color
103
+ end
104
+ end
46
105
  label = label[0..-2] if label[-1] == ','
47
- string << "label=\"#{label}\"]"
106
+ string << "label=\"#{label}\",color=\"#{color.to_s}\"]"
107
+ string.gsub!('\\\\""]', '\\""]')
108
+ string.gsub('\"\""]', '\""]')
48
109
  # Parent-child relationships.
49
110
  if entity.has_parent?
50
- string << "\n#{entity.parent.id} -- #{entity.id};"
111
+ unless options[:first] == true
112
+ string << "\n#{entity.parent.id} -- #{entity.id};"
113
+ end
51
114
  end
52
- # Edges.
53
- if entity.has_edges?
54
- entity.edges.each_pair do |target, type|
55
- string << "\n#{entity.id} -- #{target}"
56
- string << "[label=#{type},dir=forward,"
57
- string << "arrowhead=\"odiamond\"]"
115
+ # Dependencies.
116
+ if entity.has_dependencies?
117
+ entity.dependencies.each do |dependency|
118
+ dir = ''
119
+ if dependency.directed == true
120
+ dir = dependency.direction == 1 ? 'forward' : 'back'
121
+ dir = ",dir=#{dir}"
122
+ else
123
+ dir = ",dir=both"
124
+ end
125
+ string << "\n#{entity.id} -- #{dependency.target}"
126
+ string << "[label=#{dependency.type}#{dir}]"
58
127
  end
59
128
  end
60
129
  # Recurse.
130
+ options[:first] = false
61
131
  if entity.has_children?
62
132
  entity.each do |child|
63
133
  string << self.to_dot(child, options)