proiel 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015-2016 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2018 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -7,6 +7,95 @@ module PROIEL
7
7
  module PROIELXML
8
8
  # @api private
9
9
  module Reader
10
+ class DictionarySource
11
+ include SAXMachine
12
+
13
+ attribute :idref, required: true
14
+ attribute :license, required: false
15
+ attribute :n, required: false
16
+ end
17
+
18
+ class DictionaryGloss
19
+ include SAXMachine
20
+
21
+ attribute :language, required: true
22
+ value :gloss
23
+ end
24
+
25
+ class DictionaryHomograph
26
+ include SAXMachine
27
+
28
+ attribute :lemma, required: true
29
+ attribute :'part-of-speech', as: :part_of_speech, required: true
30
+ end
31
+
32
+ class DictionarySlot2
33
+ include SAXMachine
34
+
35
+ attribute :form, required: true
36
+ attribute :n, required: true
37
+ end
38
+
39
+ class DictionarySlot1
40
+ include SAXMachine
41
+
42
+ elements :slot2, as: :slot2s, class: DictionarySlot2
43
+
44
+ attribute :morphology, required: true
45
+ end
46
+
47
+ class DictionaryArgument
48
+ include SAXMachine
49
+
50
+ attribute :relation, required: true
51
+ attribute :lemma, required: false
52
+ attribute :'part-of-speech', as: :part_of_speech, required: false
53
+ attribute :mood, required: false
54
+ attribute :case, required: false
55
+ end
56
+
57
+ class DictionaryToken
58
+ include SAXMachine
59
+
60
+ attribute :idref, required: true
61
+ attribute :flags, required: false
62
+ end
63
+
64
+ class DictionaryFrame
65
+ include SAXMachine
66
+
67
+ # We skip the intermediate grouping elements 'arguments' and 'tokens'
68
+ elements :argument, as: :arguments, class: DictionaryArgument
69
+ elements :token, as: :tokens, class: DictionaryToken
70
+ end
71
+
72
+ class DictionaryLemma
73
+ include SAXMachine
74
+
75
+ attribute :lemma, required: true
76
+ attribute :'part-of-speech', as: :part_of_speech, required: true
77
+ attribute :n, required: false
78
+
79
+ # We skip the intermediate grouping elements 'distribution', 'glosses', 'homographs', 'paradigm' and 'valency'
80
+ elements :source, as: :distribution, class: DictionarySource
81
+ elements :gloss, as: :glosses, class: DictionaryGloss
82
+ elements :homograph, as: :homographs, class: DictionaryHomograph
83
+ elements :slot1, as: :paradigm, class: DictionarySlot1
84
+ elements :frame, as: :valency, class: DictionaryFrame
85
+ end
86
+
87
+ # Parsing class for `dictionary` elements.
88
+ class Dictionary
89
+ include SAXMachine
90
+
91
+ attribute :language, required: true
92
+ attribute :dialect, required: false
93
+
94
+ # We skip the intermediate grouping elements 'sources' and 'lemmata'
95
+ elements :source, as: :sources, class: DictionarySource
96
+ elements :lemma, as: :lemmata, class: DictionaryLemma
97
+ end
98
+
10
99
  # Parsing class for `slash` elements.
11
100
  class Slash
12
101
  include SAXMachine
@@ -15,6 +104,22 @@ module PROIEL
15
104
  attribute :relation, required: true
16
105
  end
17
106
 
107
+ # Parsing class for `semantic-tag` elements.
108
+ class SemanticTag
109
+ include SAXMachine
110
+
111
+ attribute :attribute, required: true
112
+ attribute :value, required: true
113
+ end
114
+
115
+ # Parsing class for `note` elements.
116
+ class Note
117
+ include SAXMachine
118
+
119
+ attribute :originator, required: true
120
+ value :content
121
+ end
122
+
18
123
  # Parsing class for `token` elements.
19
124
  class Token
20
125
  include SAXMachine
@@ -37,6 +142,8 @@ module PROIEL
37
142
  attribute :'foreign-ids', as: :foreign_ids
38
143
 
39
144
  elements :slash, as: :slashes, class: Slash
145
+ elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
146
+ elements :note, as: :notes, class: Note
40
147
  end
41
148
 
42
149
  # Parsing class for `sentence` elements.
@@ -54,6 +161,7 @@ module PROIEL
54
161
  attribute :'presentation-after', as: :presentation_after
55
162
 
56
163
  elements :token, as: :tokens, class: Token
164
+ elements :note, as: :notes, class: Note
57
165
  end
58
166
 
59
167
  # Parsing class for `div` elements.
@@ -67,6 +175,7 @@ module PROIEL
67
175
 
68
176
  element :title
69
177
  elements :sentence, as: :sentences, class: Sentence
178
+ elements :note, as: :notes, class: Note
70
179
  end
71
180
 
72
181
  # Parsing class for `source` elements.
@@ -74,10 +183,12 @@ module PROIEL
74
183
  include SAXMachine
75
184
 
76
185
  attribute :id, required: true
77
- attribute :'alignment-id', as: :alignment_id, required: false
186
+ attribute :'alignment-id', as: :alignment_id, class: String, required: false
78
187
  attribute :language, required: true
188
+ attribute :dialect, required: false
79
189
 
80
190
  element :title
191
+ element :alternative_title
81
192
  element :author
82
193
  element :citation_part
83
194
  element :principal
@@ -107,7 +218,11 @@ module PROIEL
107
218
  element :printed_text_publisher
108
219
  element :printed_text_place
109
220
  element :printed_text_date
221
+ element :chronology_composition
222
+ element :chronology_manuscript
223
+
110
224
  elements :div, as: :divs, class: Div
225
+ elements :note, as: :notes, class: Note
111
226
  end
112
227
 
113
228
  # Parsing class for `relations/value` elements.
@@ -174,6 +289,25 @@ module PROIEL
174
289
  attribute :summary, required: true
175
290
  end
176
291
 
292
+ # Parsing class for `lemma` elements.
293
+ class Lemma
294
+ include SAXMachine
295
+
296
+ attribute :form, required: true
297
+ attribute :'part-of-speech', as: :part_of_speech, required: true
298
+ attribute :gloss, required: false
299
+
300
+ elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
301
+ elements :note, as: :notes, class: Note
302
+ end
303
+
304
+ # Parsing class for `dictionary` elements.
305
+ class Dictionary
306
+ include SAXMachine
307
+
308
+ elements :lemma, as: :lemmas, class: Lemma
309
+ end
310
+
177
311
  # Parsing class for `information_statuses` elements.
178
312
  class InformationStatuses
179
313
  include SAXMachine
@@ -189,6 +323,7 @@ module PROIEL
189
323
  element :parts_of_speech, as: :parts_of_speech, class: PartsOfSpeech
190
324
  element :morphology, class: Morphology
191
325
  element :information_statuses, as: :information_statuses, class: InformationStatuses
326
+ element :dictionary, as: :dictionary, class: Dictionary
192
327
  end
193
328
 
194
329
  # Parsing class for `proiel` elements.
@@ -199,6 +334,7 @@ module PROIEL
199
334
  attribute :'schema-version', as: :schema_version, required: true
200
335
 
201
336
  elements :source, as: :sources, class: Source
337
+ elements :dictionary, as: :dictionaries, class: Dictionary
202
338
  element :annotation, class: Annotation
203
339
  end
204
340
 
@@ -16,7 +16,7 @@ module PROIEL
16
16
  # @return [String] schema version number
17
17
  #
18
18
  def self.current_proiel_xml_schema_version
19
- '2.1'
19
+ '3.0'
20
20
  end
21
21
 
22
22
  # Invalid PROIEL XML schema version error.
@@ -41,6 +41,8 @@ module PROIEL
41
41
  '2.0'
42
42
  when '2.1'
43
43
  '2.1'
44
+ when '3.0'
45
+ '3.0'
44
46
  when NilClass
45
47
  '1.0'
46
48
  else
@@ -70,7 +72,7 @@ module PROIEL
70
72
  # @raise ArgumentError
71
73
  #
72
74
  def self.proiel_xml_schema_filename(schema_version)
73
- if schema_version == '1.0' or schema_version == '2.0' or schema_version == '2.1'
75
+ if schema_version == '1.0' or schema_version == '2.0' or schema_version == '2.1' or schema_version == '3.0'
74
76
  File.join(File.dirname(__FILE__),
75
77
  "proiel-#{schema_version}",
76
78
  "proiel-#{schema_version}.xsd")
@@ -113,10 +113,13 @@ module PROIEL
113
113
  # Returns the printable form of the sentence with all token forms and any
114
114
  # presentation data.
115
115
  #
116
+ # @param custom_token_formatter [Lambda] formatting function for tokens
117
+ # which is passed the token as its sole argument
118
+ #
116
119
  # @return [String] the printable form of the sentence
117
- def printable_form(options = {})
120
+ def printable_form(custom_token_formatter: nil)
118
121
  [presentation_before,
119
- @children.reject(&:is_empty?).map { |t| t.printable_form(options) },
122
+ @children.reject(&:is_empty?).map { |t| t.printable_form(custom_token_formatter: custom_token_formatter) },
120
123
  presentation_after].compact.join
121
124
  end
122
125
 
data/lib/proiel/source.rb CHANGED
@@ -15,6 +15,9 @@ module PROIEL
15
15
  # @return [String] language of the source as an ISO 639-3 language tag
16
16
  attr_reader :language
17
17
 
18
+ # @return [String] dialect of the source
19
+ attr_reader :dialect
20
+
18
21
  # @return [DateTime] export time for the source
19
22
  attr_reader :export_time
20
23
 
@@ -26,7 +29,7 @@ module PROIEL
26
29
  attr_reader :alignment_id
27
30
 
28
31
  # Creates a new source object.
29
- def initialize(parent, id, export_time, language, metadata, alignment_id, &block)
32
+ def initialize(parent, id, export_time, language, dialect, metadata, alignment_id, &block)
30
33
  @treebank = parent
31
34
  @id = id.freeze
32
35
 
@@ -34,6 +37,7 @@ module PROIEL
34
37
  @export_time = export_time.nil? ? nil : DateTime.parse(export_time).freeze
35
38
 
36
39
  @language = language.freeze
40
+ @dialect = dialect ? dialect.freeze : nil
37
41
  @metadata = metadata.freeze
38
42
 
39
43
  raise ArgumentError, 'string or nil expected' unless alignment_id.nil? or alignment_id.is_a?(String)
@@ -50,9 +54,12 @@ module PROIEL
50
54
  # Returns the printable form of the source with all token forms and any
51
55
  # presentation data.
52
56
  #
57
+ # @param custom_token_formatter [Lambda] formatting function for tokens
58
+ # which is passed the token as its sole argument
59
+ #
53
60
  # @return [String] the printable form of the source
54
- def printable_form(options = {})
55
- @children.map { |d| d.printable_form(options) }.compact.join
61
+ def printable_form(custom_token_formatter: nil)
62
+ @children.map { |d| d.printable_form(custom_token_formatter: custom_token_formatter) }.compact.join
56
63
  end
57
64
 
58
65
  # Accesses metadata fields.
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2018 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -23,9 +23,13 @@ module PROIEL
23
23
  # @return [Array<Source>] sources in the treebank
24
24
  attr_reader :sources
25
25
 
26
+ # @return [Array<Dictionary>] dictionaries in the treebank
27
+ attr_reader :dictionaries
28
+
26
29
  # Available metadata elements for sources.
27
30
  METADATA_ELEMENTS = %i(
28
31
  title
32
+ alternative_title
29
33
  author
30
34
  citation_part
31
35
  principal
@@ -55,6 +59,8 @@ module PROIEL
55
59
  printed_text_publisher
56
60
  printed_text_place
57
61
  printed_text_date
62
+ chronology_composition
63
+ chronology_manuscript
58
64
  )
59
65
 
60
66
  # Creates a new treebank object.
@@ -62,6 +68,7 @@ module PROIEL
62
68
  @annotation_schema = nil
63
69
  @schema_version = nil
64
70
  @sources = []
71
+ @dictionaries = []
65
72
 
66
73
  @source_index = {}
67
74
  @div_index = {}
@@ -85,12 +92,18 @@ module PROIEL
85
92
  tf = PROIELXML::Reader.parse_io(f)
86
93
 
87
94
  tf.proiel.sources.each do |s|
88
- @sources << Source.new(self, s.id, tf.proiel.export_time, s.language,
95
+ @sources << Source.new(self, s.id, tf.proiel.export_time, s.language, s.dialect,
89
96
  bundle_metadata(s), s.alignment_id) do |source|
90
97
  build_divs(s, source)
91
98
  end
92
99
 
93
- index_objects!(@sources.last)
100
+ index_source_objects!(@sources.last)
101
+ end
102
+
103
+ tf.proiel.dictionaries.each do |s|
104
+ @dictionaries << Dictionary.new(self, tf.proiel.export_time, s.language, s.dialect, s)
105
+
106
+ index_dictionary_objects!(@dictionaries.last)
94
107
  end
95
108
 
96
109
  annotation_schema = AnnotationSchema.new(tf.proiel.annotation)
@@ -198,7 +211,7 @@ module PROIEL
198
211
  end
199
212
  end
200
213
 
201
- def index_objects!(source)
214
+ def index_source_objects!(source)
202
215
  @source_index[source.id] = source
203
216
 
204
217
  source.divs.each do |div|
@@ -213,5 +226,9 @@ module PROIEL
213
226
  end
214
227
  end
215
228
  end
229
+
230
+ def index_dictionary_objects!(dictionary)
231
+ # TODO
232
+ end
216
233
  end
217
234
  end
@@ -5,5 +5,5 @@
5
5
  #++
6
6
  module PROIEL
7
7
  # Gem version
8
- VERSION = '1.2.1'
8
+ VERSION = '1.3.0'
9
9
  end
@@ -2,7 +2,7 @@ module PROIEL
2
2
  module Visualization
3
3
  module Graphviz
4
4
  DEFAULT_GRAPHVIZ_BINARY = 'dot'.freeze
5
- DEFAULT_TEMPLATES = %i(classic linearized packed)
5
+ DEFAULT_TEMPLATES = %i(classic linearized packed modern aligned-modern)
6
6
  SUPPORTED_OUTPUT_FORMATS = %i(png svg)
7
7
 
8
8
  class GraphvizError < Exception
@@ -21,7 +21,7 @@ module PROIEL
21
21
  def self.generate(template, graph, output_format, options = {})
22
22
  raise ArgumentError, 'string or symbol expected' unless template.is_a?(String) or template.is_a?(Symbol)
23
23
 
24
- dot_code = generate_dot(template, graph)
24
+ dot_code = generate_dot(template, graph, options)
25
25
 
26
26
  if output_format.to_sym == :dot
27
27
  dot_code
@@ -58,7 +58,9 @@ module PROIEL
58
58
  result
59
59
  end
60
60
 
61
- def self.generate_dot(template, graph)
61
+ def self.generate_dot(template, graph, options)
62
+ raise ArgumentError, 'invalid direction' unless options[:direction].nil? or %(TD LR).include?(options[:direction])
63
+
62
64
  filename = template_filename(template)
63
65
 
64
66
  content = File.read(filename)
@@ -66,12 +68,14 @@ module PROIEL
66
68
  template = ERB.new(content, nil, '-')
67
69
  template.filename = filename
68
70
 
69
- TemplateContext.new(graph).generate(template)
71
+ TemplateContext.new(graph, options[:direction] || 'TD').generate(template)
70
72
  end
71
73
 
72
74
  class TemplateContext
73
- def initialize(graph)
75
+ def initialize(graph, direction, title = '')
74
76
  @graph = graph
77
+ @direction = direction
78
+ @title = title
75
79
  end
76
80
 
77
81
  def generate(template)
@@ -0,0 +1,83 @@
1
+ digraph "<%= @title -%>" {
2
+ charset="UTF-8";
3
+ graph [truecolor=true,bgcolor=transparent];
4
+ rankdir="<%= @direction -%>";
5
+ nodesep=0.1;
6
+ ranksep=0.25;
7
+
8
+ <%- @graph.left.each_with_index do |tokens, i| -%>
9
+ <%= "rootL#{i}" -%> [label="",shape=point];
10
+
11
+ <%- tokens.select { |t| t.empty_token_sort != 'P' }.each do |token| -%>
12
+ <%- if token.empty_token_sort -%>
13
+ <%= node token.id, token.relation.to_s.upcase, shape: :none, fontcolor: :gray -%>
14
+ <%- else -%>
15
+ <%= node token.id, token.relation.to_s.upcase, shape: :none -%>
16
+ <%- end -%>
17
+
18
+ <%- if token.relation -%>
19
+ <%= edge (token.head ? token.head.id : "rootL#{i}"), token.id, '', weight: 1.0, color: :orange, arrowhead: :none -%>
20
+ <%- end -%>
21
+
22
+ <%- token.slashes.each do |(relation, target)| -%>
23
+ <%= edge token.id, target, relation.to_s.upcase, weight: 0.0, fontcolor: :blue, color: :blue, style: :dashed %>
24
+ <%- end -%>
25
+ <%- end -%>
26
+
27
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
28
+ <%= edge token.id, "T#{token.id}", nil, weight: 10, arrowhead: :none -%>
29
+ <%- end -%>
30
+ <%- end -%>
31
+
32
+ {
33
+ rank="same";
34
+
35
+ <%- @graph.left.each do |tokens| -%>
36
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
37
+ <%= node "T#{token.id}", token.form, shape: :none, fontcolor: :blue, tooltip: [token.lemma, token.part_of_speech, token.morphology].join("\n") -%>
38
+ <%- end -%>
39
+
40
+ <%= tokens.reject(&:empty_token_sort).map { |token| "T#{token.id}" }.join('->') -%> [style="invis"];
41
+ <%- end -%>
42
+ }
43
+
44
+ <%- @graph.right.each_with_index do |tokens, i| -%>
45
+ <%= "rootR#{i}" -%> [label="",shape=point];
46
+
47
+ <%- tokens.select { |t| t.empty_token_sort != 'P' }.each do |token| -%>
48
+ <%- if token.empty_token_sort -%>
49
+ <%= node token.id, token.relation.to_s.upcase, shape: :none, fontcolor: :gray -%>
50
+ <%- else -%>
51
+ <%= node token.id, token.relation.to_s.upcase, shape: :none -%>
52
+ <%- end -%>
53
+
54
+ <%- if token.relation -%>
55
+ <%= edge token.id, (token.head ? token.head.id : "rootR#{i}"), '', weight: 1.0, color: :orange, arrowhead: :none -%>
56
+ <%- end -%>
57
+
58
+ <%- token.slashes.each do |(relation, target)| -%>
59
+ <%= edge token.id, target, relation.to_s.upcase, weight: 0.0, fontcolor: :blue, color: :blue, style: :dashed %>
60
+ <%- end -%>
61
+ <%- end -%>
62
+
63
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
64
+ <%= edge "T#{token.id}", token.id, nil, weight: 10, arrowhead: :none -%>
65
+ <%- end -%>
66
+ <%- end -%>
67
+
68
+ {
69
+ rank="same";
70
+
71
+ <%- @graph.right.each do |tokens| -%>
72
+ <%- tokens.reject(&:empty_token_sort).each do |token| -%>
73
+ <%= node "T#{token.id}", token.form, shape: :none, fontcolor: :blue, tooltip: [token.lemma, token.part_of_speech, token.morphology].join("\n") -%>
74
+ <%- end -%>
75
+
76
+ <%= tokens.reject(&:empty_token_sort).map { |token| "T#{token.id}" }.join('->') -%> [style="invis"];
77
+ <%- end -%>
78
+ }
79
+
80
+ <%- @graph.alignments.each do |x, y| -%>
81
+ <%= "T#{x}" -%> -> <%= "T#{y}" -%> [color=blue,dir=none];
82
+ <%- end -%>
83
+ }