proiel 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015-2016 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2018 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -7,6 +7,95 @@ module PROIEL
7
7
  module PROIELXML
8
8
  # @api private
9
9
  module Reader
10
+ class DictionarySource
11
+ include SAXMachine
12
+
13
+ attribute :idref, required: true
14
+ attribute :license, required: false
15
+ attribute :n, required: false
16
+ end
17
+
18
+ class DictionaryGloss
19
+ include SAXMachine
20
+
21
+ attribute :language, required: true
22
+ value :gloss
23
+ end
24
+
25
+ class DictionaryHomograph
26
+ include SAXMachine
27
+
28
+ attribute :lemma, required: true
29
+ attribute :'part-of-speech', as: :part_of_speech, required: true
30
+ end
31
+
32
+ class DictionarySlot2
33
+ include SAXMachine
34
+
35
+ attribute :form, required: true
36
+ attribute :n, required: true
37
+ end
38
+
39
+ class DictionarySlot1
40
+ include SAXMachine
41
+
42
+ elements :slot2, as: :slot2s, class: DictionarySlot2
43
+
44
+ attribute :morphology, required: true
45
+ end
46
+
47
+ class DictionaryArgument
48
+ include SAXMachine
49
+
50
+ attribute :relation, required: true
51
+ attribute :lemma, required: false
52
+ attribute :'part-of-speech', as: :part_of_speech, required: false
53
+ attribute :mood, required: false
54
+ attribute :case, required: false
55
+ end
56
+
57
+ class DictionaryToken
58
+ include SAXMachine
59
+
60
+ attribute :idref, required: true
61
+ attribute :flags, required: false
62
+ end
63
+
64
+ class DictionaryFrame
65
+ include SAXMachine
66
+
67
+ # We skip the intermediate grouping elements 'arguments' and 'tokens'
68
+ elements :argument, as: :arguments, class: DictionaryArgument
69
+ elements :token, as: :tokens, class: DictionaryToken
70
+ end
71
+
72
+ class DictionaryLemma
73
+ include SAXMachine
74
+
75
+ attribute :lemma, required: true
76
+ attribute :'part-of-speech', as: :part_of_speech, required: true
77
+ attribute :n, required: false
78
+
79
+ # We skip the intermediate grouping elements 'distribution', 'glosses', 'homographs', 'paradigm' and 'valency'
80
+ elements :source, as: :distribution, class: DictionarySource
81
+ elements :gloss, as: :glosses, class: DictionaryGloss
82
+ elements :homograph, as: :homographs, class: DictionaryHomograph
83
+ elements :slot1, as: :paradigm, class: DictionarySlot1
84
+ elements :frame, as: :valency, class: DictionaryFrame
85
+ end
86
+
87
+ # Parsing class for `dictionary` elements.
88
+ class Dictionary
89
+ include SAXMachine
90
+
91
+ attribute :language, required: true
92
+ attribute :dialect, required: false
93
+
94
+ # We skip the intermediate grouping elements 'sources' and 'lemmata'
95
+ elements :source, as: :sources, class: DictionarySource
96
+ elements :lemma, as: :lemmata, class: DictionaryLemma
97
+ end
98
+
10
99
  # Parsing class for `slash` elements.
11
100
  class Slash
12
101
  include SAXMachine
@@ -15,6 +104,22 @@ module PROIEL
15
104
  attribute :relation, required: true
16
105
  end
17
106
 
107
+ # Parsing class for `semantic-tag` elements.
108
+ class SemanticTag
109
+ include SAXMachine
110
+
111
+ attribute :attribute, required: true
112
+ attribute :value, required: true
113
+ end
114
+
115
+ # Parsing class for `note` elements.
116
+ class Note
117
+ include SAXMachine
118
+
119
+ attribute :originator, required: true
120
+ value :content
121
+ end
122
+
18
123
  # Parsing class for `token` elements.
19
124
  class Token
20
125
  include SAXMachine
@@ -37,6 +142,8 @@ module PROIEL
37
142
  attribute :'foreign-ids', as: :foreign_ids
38
143
 
39
144
  elements :slash, as: :slashes, class: Slash
145
+ elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
146
+ elements :note, as: :notes, class: Note
40
147
  end
41
148
 
42
149
  # Parsing class for `sentence` elements.
@@ -54,6 +161,7 @@ module PROIEL
54
161
  attribute :'presentation-after', as: :presentation_after
55
162
 
56
163
  elements :token, as: :tokens, class: Token
164
+ elements :note, as: :notes, class: Note
57
165
  end
58
166
 
59
167
  # Parsing class for `div` elements.
@@ -67,6 +175,7 @@ module PROIEL
67
175
 
68
176
  element :title
69
177
  elements :sentence, as: :sentences, class: Sentence
178
+ elements :note, as: :notes, class: Note
70
179
  end
71
180
 
72
181
  # Parsing class for `source` elements.
@@ -74,10 +183,12 @@ module PROIEL
74
183
  include SAXMachine
75
184
 
76
185
  attribute :id, required: true
77
- attribute :'alignment-id', as: :alignment_id, required: false
186
+ attribute :'alignment-id', as: :alignment_id, class: String, required: false
78
187
  attribute :language, required: true
188
+ attribute :dialect, required: false
79
189
 
80
190
  element :title
191
+ element :alternative_title
81
192
  element :author
82
193
  element :citation_part
83
194
  element :principal
@@ -107,7 +218,11 @@ module PROIEL
107
218
  element :printed_text_publisher
108
219
  element :printed_text_place
109
220
  element :printed_text_date
221
+ element :chronology_composition
222
+ element :chronology_manuscript
223
+
110
224
  elements :div, as: :divs, class: Div
225
+ elements :note, as: :notes, class: Note
111
226
  end
112
227
 
113
228
  # Parsing class for `relations/value` elements.
@@ -174,6 +289,25 @@ module PROIEL
174
289
  attribute :summary, required: true
175
290
  end
176
291
 
292
+ # Parsing class for `lemma` elements.
293
+ class Lemma
294
+ include SAXMachine
295
+
296
+ attribute :form, required: true
297
+ attribute :'part-of-speech', as: :part_of_speech, required: true
298
+ attribute :gloss, required: false
299
+
300
+ elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
301
+ elements :note, as: :notes, class: Note
302
+ end
303
+
304
+ # Parsing class for `dictionary` elements.
305
+ class Dictionary
306
+ include SAXMachine
307
+
308
+ elements :lemma, as: :lemmas, class: Lemma
309
+ end
310
+
177
311
  # Parsing class for `information_statuses` elements.
178
312
  class InformationStatuses
179
313
  include SAXMachine
@@ -189,6 +323,7 @@ module PROIEL
189
323
  element :parts_of_speech, as: :parts_of_speech, class: PartsOfSpeech
190
324
  element :morphology, class: Morphology
191
325
  element :information_statuses, as: :information_statuses, class: InformationStatuses
326
+ element :dictionary, as: :dictionary, class: Dictionary
192
327
  end
193
328
 
194
329
  # Parsing class for `proiel` elements.
@@ -199,6 +334,7 @@ module PROIEL
199
334
  attribute :'schema-version', as: :schema_version, required: true
200
335
 
201
336
  elements :source, as: :sources, class: Source
337
+ elements :dictionary, as: :dictionaries, class: Dictionary
202
338
  element :annotation, class: Annotation
203
339
  end
204
340
 
@@ -16,7 +16,7 @@ module PROIEL
16
16
  # @return [String] schema version number
17
17
  #
18
18
  def self.current_proiel_xml_schema_version
19
- '2.1'
19
+ '3.0'
20
20
  end
21
21
 
22
22
  # Invalid PROIEL XML schema version error.
@@ -41,6 +41,8 @@ module PROIEL
41
41
  '2.0'
42
42
  when '2.1'
43
43
  '2.1'
44
+ when '3.0'
45
+ '3.0'
44
46
  when NilClass
45
47
  '1.0'
46
48
  else
@@ -70,7 +72,7 @@ module PROIEL
70
72
  # @raise ArgumentError
71
73
  #
72
74
  def self.proiel_xml_schema_filename(schema_version)
73
- if schema_version == '1.0' or schema_version == '2.0' or schema_version == '2.1'
75
+ if schema_version == '1.0' or schema_version == '2.0' or schema_version == '2.1' or schema_version == '3.0'
74
76
  File.join(File.dirname(__FILE__),
75
77
  "proiel-#{schema_version}",
76
78
  "proiel-#{schema_version}.xsd")
@@ -113,10 +113,13 @@ module PROIEL
113
113
  # Returns the printable form of the sentence with all token forms and any
114
114
  # presentation data.
115
115
  #
116
+ # @param custom_token_formatter [Lambda] formatting function for tokens
117
+ # which is passed the token as its sole argument
118
+ #
116
119
  # @return [String] the printable form of the sentence
117
- def printable_form(options = {})
120
+ def printable_form(custom_token_formatter: nil)
118
121
  [presentation_before,
119
- @children.reject(&:is_empty?).map { |t| t.printable_form(options) },
122
+ @children.reject(&:is_empty?).map { |t| t.printable_form(custom_token_formatter: custom_token_formatter) },
120
123
  presentation_after].compact.join
121
124
  end
122
125
 
data/lib/proiel/source.rb CHANGED
@@ -15,6 +15,9 @@ module PROIEL
15
15
  # @return [String] language of the source as an ISO 639-3 language tag
16
16
  attr_reader :language
17
17
 
18
+ # @return [String] dialect of the source
19
+ attr_reader :dialect
20
+
18
21
  # @return [DateTime] export time for the source
19
22
  attr_reader :export_time
20
23
 
@@ -26,7 +29,7 @@ module PROIEL
26
29
  attr_reader :alignment_id
27
30
 
28
31
  # Creates a new source object.
29
- def initialize(parent, id, export_time, language, metadata, alignment_id, &block)
32
+ def initialize(parent, id, export_time, language, dialect, metadata, alignment_id, &block)
30
33
  @treebank = parent
31
34
  @id = id.freeze
32
35
 
@@ -34,6 +37,7 @@ module PROIEL
34
37
  @export_time = export_time.nil? ? nil : DateTime.parse(export_time).freeze
35
38
 
36
39
  @language = language.freeze
40
+ @dialect = dialect ? dialect.freeze : nil
37
41
  @metadata = metadata.freeze
38
42
 
39
43
  raise ArgumentError, 'string or nil expected' unless alignment_id.nil? or alignment_id.is_a?(String)
@@ -50,9 +54,12 @@ module PROIEL
50
54
  # Returns the printable form of the source with all token forms and any
51
55
  # presentation data.
52
56
  #
57
+ # @param custom_token_formatter [Lambda] formatting function for tokens
58
+ # which is passed the token as its sole argument
59
+ #
53
60
  # @return [String] the printable form of the source
54
- def printable_form(options = {})
55
- @children.map { |d| d.printable_form(options) }.compact.join
61
+ def printable_form(custom_token_formatter: nil)
62
+ @children.map { |d| d.printable_form(custom_token_formatter: custom_token_formatter) }.compact.join
56
63
  end
57
64
 
58
65
  # Accesses metadata fields.
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2018 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -23,9 +23,13 @@ module PROIEL
23
23
  # @return [Array<Source>] sources in the treebank
24
24
  attr_reader :sources
25
25
 
26
+ # @return [Array<Dictionary>] dictionaries in the treebank
27
+ attr_reader :dictionaries
28
+
26
29
  # Available metadata elements for sources.
27
30
  METADATA_ELEMENTS = %i(
28
31
  title
32
+ alternative_title
29
33
  author
30
34
  citation_part
31
35
  principal
@@ -55,6 +59,8 @@ module PROIEL
55
59
  printed_text_publisher
56
60
  printed_text_place
57
61
  printed_text_date
62
+ chronology_composition
63
+ chronology_manuscript
58
64
  )
59
65
 
60
66
  # Creates a new treebank object.
@@ -62,6 +68,7 @@ module PROIEL
62
68
  @annotation_schema = nil
63
69
  @schema_version = nil
64
70
  @sources = []
71
+ @dictionaries = []
65
72
 
66
73
  @source_index = {}
67
74
  @div_index = {}
@@ -85,12 +92,18 @@ module PROIEL
85
92
  tf = PROIELXML::Reader.parse_io(f)
86
93
 
87
94
  tf.proiel.sources.each do |s|
88
- @sources << Source.new(self, s.id, tf.proiel.export_time, s.language,
95
+ @sources << Source.new(self, s.id, tf.proiel.export_time, s.language, s.dialect,
89
96
  bundle_metadata(s), s.alignment_id) do |source|
90
97
  build_divs(s, source)
91
98
  end
92
99
 
93
- index_objects!(@sources.last)
100
+ index_source_objects!(@sources.last)
101
+ end
102
+
103
+ tf.proiel.dictionaries.each do |s|
104
+ @dictionaries << Dictionary.new(self, tf.proiel.export_time, s.language, s.dialect, s)
105
+
106
+ index_dictionary_objects!(@dictionaries.last)
94
107
  end
95
108
 
96
109
  annotation_schema = AnnotationSchema.new(tf.proiel.annotation)
@@ -198,7 +211,7 @@ module PROIEL
198
211
  end
199
212
  end
200
213
 
201
- def index_objects!(source)
214
+ def index_source_objects!(source)
202
215
  @source_index[source.id] = source
203
216
 
204
217
  source.divs.each do |div|
@@ -213,5 +226,9 @@ module PROIEL
213
226
  end
214
227
  end
215
228
  end
229
+
230
+ def index_dictionary_objects!(dictionary)
231
+ # TODO
232
+ end
216
233
  end
217
234
  end
@@ -5,5 +5,5 @@
5
5
  #++
6
6
  module PROIEL
7
7
  # Gem version
8
- VERSION = '1.2.1'
8
+ VERSION = '1.3.0'
9
9
  end
@@ -2,7 +2,7 @@ module PROIEL
2
2
  module Visualization
3
3
  module Graphviz
4
4
  DEFAULT_GRAPHVIZ_BINARY = 'dot'.freeze
5
- DEFAULT_TEMPLATES = %i(classic linearized packed)
5
+ DEFAULT_TEMPLATES = %i(classic linearized packed modern aligned-modern)
6
6
  SUPPORTED_OUTPUT_FORMATS = %i(png svg)
7
7
 
8
8
  class GraphvizError < Exception
@@ -21,7 +21,7 @@ module PROIEL
21
21
  def self.generate(template, graph, output_format, options = {})
22
22
  raise ArgumentError, 'string or symbol expected' unless template.is_a?(String) or template.is_a?(Symbol)
23
23
 
24
- dot_code = generate_dot(template, graph)
24
+ dot_code = generate_dot(template, graph, options)
25
25
 
26
26
  if output_format.to_sym == :dot
27
27
  dot_code
@@ -58,7 +58,9 @@ module PROIEL
58
58
  result
59
59
  end
60
60
 
61
- def self.generate_dot(template, graph)
61
+ def self.generate_dot(template, graph, options)
62
+ raise ArgumentError, 'invalid direction' unless options[:direction].nil? or %(TD LR).include?(options[:direction])
63
+
62
64
  filename = template_filename(template)
63
65
 
64
66
  content = File.read(filename)
@@ -66,12 +68,14 @@ module PROIEL
66
68
  template = ERB.new(content, nil, '-')
67
69
  template.filename = filename
68
70
 
69
- TemplateContext.new(graph).generate(template)
71
+ TemplateContext.new(graph, options[:direction] || 'TD').generate(template)
70
72
  end
71
73
 
72
74
  class TemplateContext
73
- def initialize(graph)
75
+ def initialize(graph, direction, title = '')
74
76
  @graph = graph
77
+ @direction = direction
78
+ @title = title
75
79
  end
76
80
 
77
81
  def generate(template)
@@ -0,0 +1,83 @@
1
+ digraph "<%= @title -%>" {
2
+ charset="UTF-8";
3
+ graph [truecolor=true,bgcolor=transparent];
4
+ rankdir="<%= @direction -%>";
5
+ nodesep=0.1;
6
+ ranksep=0.25;
7
+
8
+ <%- @graph.left.each_with_index do |tokens, i| -%>
9
+ <%= "rootL#{i}" -%> [label="",shape=point];
10
+
11
+ <%- tokens.select { |t| t.empty_token_sort != 'P' }.each do |token| -%>
12
+ <%- if token.empty_token_sort -%>
13
+ <%= node token.id, token.relation.to_s.upcase, shape: :none, fontcolor: :gray -%>
14
+ <%- else -%>
15
+ <%= node token.id, token.relation.to_s.upcase, shape: :none -%>
16
+ <%- end -%>
17
+
18
+ <%- if token.relation -%>
19
+ <%= edge (token.head ? token.head.id : "rootL#{i}"), token.id, '', weight: 1.0, color: :orange, arrowhead: :none -%>
20
+ <%- end -%>
21
+
22
+ <%- token.slashes.each do |(relation, target)| -%>
23
+ <%= edge token.id, target, relation.to_s.upcase, weight: 0.0, fontcolor: :blue, color: :blue, style: :dashed %>
24
+ <%- end -%>
25
+ <%- end -%>
26
+
27
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
28
+ <%= edge token.id, "T#{token.id}", nil, weight: 10, arrowhead: :none -%>
29
+ <%- end -%>
30
+ <%- end -%>
31
+
32
+ {
33
+ rank="same";
34
+
35
+ <%- @graph.left.each do |tokens| -%>
36
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
37
+ <%= node "T#{token.id}", token.form, shape: :none, fontcolor: :blue, tooltip: [token.lemma, token.part_of_speech, token.morphology].join("\n") -%>
38
+ <%- end -%>
39
+
40
+ <%= tokens.reject(&:empty_token_sort).map { |token| "T#{token.id}" }.join('->') -%> [style="invis"];
41
+ <%- end -%>
42
+ }
43
+
44
+ <%- @graph.right.each_with_index do |tokens, i| -%>
45
+ <%= "rootR#{i}" -%> [label="",shape=point];
46
+
47
+ <%- tokens.select { |t| t.empty_token_sort != 'P' }.each do |token| -%>
48
+ <%- if token.empty_token_sort -%>
49
+ <%= node token.id, token.relation.to_s.upcase, shape: :none, fontcolor: :gray -%>
50
+ <%- else -%>
51
+ <%= node token.id, token.relation.to_s.upcase, shape: :none -%>
52
+ <%- end -%>
53
+
54
+ <%- if token.relation -%>
55
+ <%= edge token.id, (token.head ? token.head.id : "rootR#{i}"), '', weight: 1.0, color: :orange, arrowhead: :none -%>
56
+ <%- end -%>
57
+
58
+ <%- token.slashes.each do |(relation, target)| -%>
59
+ <%= edge token.id, target, relation.to_s.upcase, weight: 0.0, fontcolor: :blue, color: :blue, style: :dashed %>
60
+ <%- end -%>
61
+ <%- end -%>
62
+
63
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
64
+ <%= edge "T#{token.id}", token.id, nil, weight: 10, arrowhead: :none -%>
65
+ <%- end -%>
66
+ <%- end -%>
67
+
68
+ {
69
+ rank="same";
70
+
71
+ <%- @graph.right.each do |tokens| -%>
72
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
73
+ <%= node "T#{token.id}", token.form, shape: :none, fontcolor: :blue, tooltip: [token.lemma, token.part_of_speech, token.morphology].join("\n") -%>
74
+ <%- end -%>
75
+
76
+ <%= tokens.reject(&:empty_token_sort).map { |token| "T#{token.id}" }.join('->') -%> [style="invis"];
77
+ <%- end -%>
78
+ }
79
+
80
+ <%- @graph.alignments.each do |x, y| -%>
81
+ <%= "T#{x}" -%> -> <%= "T#{y}" -%> [color=blue,dir=none];
82
+ <%- end -%>
83
+ }