mexico 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.project +14 -0
- data/.rspec +2 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +165 -0
- data/README.md +116 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/assets/TESTCORPUS/Corpus.xml +26 -0
- data/assets/fiesta/b6/layer_connectors.fst +16 -0
- data/assets/fiesta/b6/match_jones_161_CM_neu_checked.parsed.xml +1225 -0
- data/assets/fiesta/elan/ElanFileFormat.eaf +76 -0
- data/assets/fiesta/elan/ElanFileFormat.pfsx +8 -0
- data/assets/fiesta/elan/ElanFileFormatComplexInterLayerLinks.eaf +107 -0
- data/assets/fiesta/elan/reflinks_example.eaf +270 -0
- data/assets/fiesta/elan/test_all_interlayerrelations.fst +43 -0
- data/assets/fiesta/head/head.fst +24 -0
- data/assets/fiesta/praat/mexico.ShortTextGrid +81 -0
- data/assets/fiesta/praat/mexico.TextGrid +104 -0
- data/assets/helpers/collection_ref_handler.rb +29 -0
- data/assets/helpers/id_ref_handler.rb +29 -0
- data/assets/helpers/roxml_attribute_handler.rb +57 -0
- data/assets/out_only/construct_and_write_spec.toe +99 -0
- data/assets/spec.html +554 -0
- data/bin/mexico +5 -0
- data/features/mexico.feature +9 -0
- data/features/step_definitions/mexico_steps.rb +0 -0
- data/features/support/env.rb +15 -0
- data/info/releasenotes/0.0.1.md +5 -0
- data/info/releasenotes/0.0.2.md +5 -0
- data/info/releasenotes/0.0.3.md +5 -0
- data/info/releasenotes/0.0.4.md +5 -0
- data/info/releasenotes/0.0.5.md +8 -0
- data/info/releasenotes/0.0.6.md +3 -0
- data/info/releasenotes/0.0.7.md +4 -0
- data/info/releasenotes/0.0.8.md +9 -0
- data/info/releasenotes/0.0.9.md +7 -0
- data/lib/mexico/cmd.rb +210 -0
- data/lib/mexico/constants.rb +82 -0
- data/lib/mexico/constraints/constraint.rb +85 -0
- data/lib/mexico/constraints/fiesta_constraints.rb +209 -0
- data/lib/mexico/constraints.rb +27 -0
- data/lib/mexico/core/corpus_core.rb +39 -0
- data/lib/mexico/core/design_core.rb +31 -0
- data/lib/mexico/core/media_type.rb +61 -0
- data/lib/mexico/core.rb +36 -0
- data/lib/mexico/fiesta/interfaces/b6_chat_game_interface.rb +222 -0
- data/lib/mexico/fiesta/interfaces/elan_interface.rb +143 -0
- data/lib/mexico/fiesta/interfaces/short_text_grid_interface.rb +99 -0
- data/lib/mexico/fiesta/interfaces/text_grid_interface.rb +103 -0
- data/lib/mexico/fiesta/interfaces.rb +31 -0
- data/lib/mexico/fiesta.rb +27 -0
- data/lib/mexico/file_system/bound_to_corpus.rb +37 -0
- data/lib/mexico/file_system/corpus.rb +207 -0
- data/lib/mexico/file_system/data.rb +96 -0
- data/lib/mexico/file_system/design.rb +75 -0
- data/lib/mexico/file_system/design_component.rb +78 -0
- data/lib/mexico/file_system/entry.rb +59 -0
- data/lib/mexico/file_system/fiesta_document.rb +344 -0
- data/lib/mexico/file_system/fiesta_map.rb +112 -0
- data/lib/mexico/file_system/head.rb +33 -0
- data/lib/mexico/file_system/id_ref.rb +46 -0
- data/lib/mexico/file_system/implicit_item_link.rb +19 -0
- data/lib/mexico/file_system/interval_link.rb +84 -0
- data/lib/mexico/file_system/item.rb +166 -0
- data/lib/mexico/file_system/item_link.rb +98 -0
- data/lib/mexico/file_system/item_links_proxy.rb +13 -0
- data/lib/mexico/file_system/layer.rb +89 -0
- data/lib/mexico/file_system/layer_connector.rb +74 -0
- data/lib/mexico/file_system/layer_link.rb +75 -0
- data/lib/mexico/file_system/local_file.rb +101 -0
- data/lib/mexico/file_system/participant.rb +69 -0
- data/lib/mexico/file_system/participant_role.rb +43 -0
- data/lib/mexico/file_system/point_link.rb +79 -0
- data/lib/mexico/file_system/property.rb +35 -0
- data/lib/mexico/file_system/property_map.rb +37 -0
- data/lib/mexico/file_system/resource.rb +174 -0
- data/lib/mexico/file_system/scale.rb +125 -0
- data/lib/mexico/file_system/section.rb +41 -0
- data/lib/mexico/file_system/static_collection_ref.rb +64 -0
- data/lib/mexico/file_system/template.rb +23 -0
- data/lib/mexico/file_system/trial.rb +87 -0
- data/lib/mexico/file_system/url.rb +65 -0
- data/lib/mexico/file_system.rb +62 -0
- data/lib/mexico/not_yet_implemented_error.rb +28 -0
- data/lib/mexico/util/fancy_container.rb +57 -0
- data/lib/mexico/util.rb +51 -0
- data/lib/mexico.rb +36 -0
- data/spec/constraints/constraints_spec.rb +306 -0
- data/spec/core/media_type_spec.rb +50 -0
- data/spec/fiesta/b6_spec.rb +43 -0
- data/spec/fiesta/elan_spec.rb +43 -0
- data/spec/fiesta/head_spec.rb +54 -0
- data/spec/fiesta/praat_spec.rb +54 -0
- data/spec/fiesta/read_spec.rb +76 -0
- data/spec/file_system_based/better_collection_spec.rb +142 -0
- data/spec/file_system_based/corpus_spec.rb +194 -0
- data/spec/file_system_based/design_spec.rb +100 -0
- data/spec/file_system_based/inter_links_spec.rb +100 -0
- data/spec/file_system_based/item_links_spec.rb +76 -0
- data/spec/file_system_based/rdf_spec.rb +177 -0
- data/spec/file_system_based/resource_spec.rb +111 -0
- data/spec/file_system_based/trial_spec.rb +129 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/toe/construct_and_write_spec.rb +82 -0
- data/spec/toe/construction_spec.rb +110 -0
- data/spec/toe/item_spec.rb +58 -0
- data/spec/toe/layer_spec.rb +63 -0
- data/spec/toe/scale_spec.rb +89 -0
- data/spec/toe/toe_document_spec.rb +39 -0
- data/test/helper.rb +19 -0
- data/test/test_mexico.rb +7 -0
- metadata +357 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# This file is part of the MExiCo gem.
|
|
2
|
+
# Copyright (c) 2012-2014 Peter Menke, SFB 673, Universität Bielefeld
|
|
3
|
+
# http://www.sfb673.org
|
|
4
|
+
#
|
|
5
|
+
# MExiCo is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU Lesser General Public License as
|
|
7
|
+
# published by the Free Software Foundation, either version 3 of
|
|
8
|
+
# the License, or (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# MExiCo is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU Lesser General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
16
|
+
# License along with MExiCo. If not, see
|
|
17
|
+
# <http://www.gnu.org/licenses/>.
|
|
18
|
+
|
|
19
|
+
class Mexico::Core::MediaType
|
|
20
|
+
|
|
21
|
+
attr_accessor :identifier, :name, :extensions
|
|
22
|
+
|
|
23
|
+
def initialize(opts={})
|
|
24
|
+
[:identifier,:name,:extensions].each do |att|
|
|
25
|
+
send("#{att}=", opts[att]) if opts.has_key?(att)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# This module lists all constants
|
|
33
|
+
# that are part of the current MExiCo model.
|
|
34
|
+
module Mexico::Constants
|
|
35
|
+
|
|
36
|
+
# This module contains constants for the different media types
|
|
37
|
+
# available in the MExiCo context.
|
|
38
|
+
module MediaTypes
|
|
39
|
+
|
|
40
|
+
# Digital recordings of moving pictures, usually along with sound.
|
|
41
|
+
VIDEO = Mexico::Core::MediaType.new :identifier => "video", :name => "Video", :extensions => %w(mov avi mpg mpeg m4v webm mts)
|
|
42
|
+
|
|
43
|
+
# Digital sound recordings.
|
|
44
|
+
AUDIO = Mexico::Core::MediaType.new :identifier => "audio", :name => "Audio", :extensions => %w(wav ogg aac mp3)
|
|
45
|
+
|
|
46
|
+
# Different transcription and annotation file formats.
|
|
47
|
+
ANNOTATION = Mexico::Core::MediaType.new :identifier => "annotation", :name => "Annotation", :extensions => %w(toe ShortTextGrid TextGrid eaf)
|
|
48
|
+
|
|
49
|
+
# Placeholder for all other (yet unsupported) types.
|
|
50
|
+
OTHER = Mexico::Core::MediaType.new(:identifier => "other", :name => "Other", :extensions => %w())
|
|
51
|
+
|
|
52
|
+
# This is a collection of all media types currently implemented.
|
|
53
|
+
ALL = Array.new
|
|
54
|
+
ALL << ::Mexico::Constants::MediaTypes::VIDEO
|
|
55
|
+
ALL << ::Mexico::Constants::MediaTypes::AUDIO
|
|
56
|
+
ALL << ::Mexico::Constants::MediaTypes::ANNOTATION
|
|
57
|
+
ALL << ::Mexico::Constants::MediaTypes::OTHER
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
data/lib/mexico/core.rb
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# This file is part of the MExiCo gem.
|
|
2
|
+
# Copyright (c) 2012-2014 Peter Menke, SFB 673, Universität Bielefeld
|
|
3
|
+
# http://www.sfb673.org
|
|
4
|
+
#
|
|
5
|
+
# MExiCo is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU Lesser General Public License as
|
|
7
|
+
# published by the Free Software Foundation, either version 3 of
|
|
8
|
+
# the License, or (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# MExiCo is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU Lesser General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
16
|
+
# License along with MExiCo. If not, see
|
|
17
|
+
# <http://www.gnu.org/licenses/>.
|
|
18
|
+
|
|
19
|
+
# In this module, central methods for all classes in future
|
|
20
|
+
# MExiCO implementations are gathered that should remain the
|
|
21
|
+
# same - they can simply be included in future implementations.
|
|
22
|
+
module Mexico::Core
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require 'mexico/core/media_type.rb'
|
|
27
|
+
|
|
28
|
+
require 'mexico/core/corpus_core.rb'
|
|
29
|
+
require 'mexico/core/design_core.rb'
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
module Mexico::Constants
|
|
33
|
+
|
|
34
|
+
module MediaTypes
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# This file is part of the MExiCo gem.
|
|
2
|
+
# Copyright (c) 2012-2014 Peter Menke, SFB 673, Universität Bielefeld
|
|
3
|
+
# http://www.sfb673.org
|
|
4
|
+
#
|
|
5
|
+
# MExiCo is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU Lesser General Public License as
|
|
7
|
+
# published by the Free Software Foundation, either version 3 of
|
|
8
|
+
# the License, or (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# MExiCo is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU Lesser General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
16
|
+
# License along with MExiCo. If not, see
|
|
17
|
+
# <http://www.gnu.org/licenses/>.
|
|
18
|
+
|
|
19
|
+
# Import and export interface for the chat game file format by
|
|
20
|
+
# project B6.
|
|
21
|
+
|
|
22
|
+
require 'singleton'
|
|
23
|
+
|
|
24
|
+
class Mexico::Fiesta::Interfaces::B6ChatGameInterface
|
|
25
|
+
|
|
26
|
+
include Singleton
|
|
27
|
+
include Mexico::FileSystem
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Imports a B6 chat game document by reading contents from the given IO object.
|
|
31
|
+
# @param io [IO] The IO object to read from.
|
|
32
|
+
# @return [FiestaDocument] on success, the corresponding FiESTA document.
|
|
33
|
+
def import(io=$stdin)
|
|
34
|
+
fiesta_document = FiestaDocument.new
|
|
35
|
+
f = fiesta_document
|
|
36
|
+
|
|
37
|
+
t = fiesta_document.add_standard_timeline('s')
|
|
38
|
+
x = Scale.new(identifier: 'spatial_x', name: 'Spatial coordinate X', unit: 'pixel', document: f)
|
|
39
|
+
y = Scale.new(identifier: 'spatial_y', name: 'Spatial coordinate Y', unit: 'pixel', document: f)
|
|
40
|
+
fiesta_document.scales << x
|
|
41
|
+
fiesta_document.scales << y
|
|
42
|
+
|
|
43
|
+
lChats = Layer.new(identifier: 'chats', name: 'Chats', document: f)
|
|
44
|
+
lMoves = Layer.new(identifier: 'moves', name: 'Moves', document: f)
|
|
45
|
+
lSents = Layer.new(identifier: 'sentences', name: 'Sentences', document: f)
|
|
46
|
+
lParsT = Layer.new(identifier: 'parsedTrees', name: 'Parsed Trees', document: f)
|
|
47
|
+
lParsP = Layer.new(identifier: 'parsedPhrases', name: 'Parsed Phrases', document: f)
|
|
48
|
+
|
|
49
|
+
# additional, secondary annotations for:
|
|
50
|
+
# - word / correction pairs
|
|
51
|
+
# - forms // LATER
|
|
52
|
+
# - colors // LATER
|
|
53
|
+
# - sentences, with attributes
|
|
54
|
+
# - their parsetrees, with attributes
|
|
55
|
+
|
|
56
|
+
fiesta_document.layers << lChats
|
|
57
|
+
fiesta_document.layers << lMoves
|
|
58
|
+
fiesta_document.layers << lSents
|
|
59
|
+
fiesta_document.layers << lParsT
|
|
60
|
+
fiesta_document.layers << lParsP
|
|
61
|
+
|
|
62
|
+
# B6 data is avaiable in XML documents, so we read
|
|
63
|
+
# those into a Nokogiri object.
|
|
64
|
+
xml_document = ::Nokogiri::XML(io)
|
|
65
|
+
|
|
66
|
+
# puts xml_document.root
|
|
67
|
+
|
|
68
|
+
round_counter = 0
|
|
69
|
+
|
|
70
|
+
last_chat_elem = nil
|
|
71
|
+
last_chat_item = nil
|
|
72
|
+
xml_document.xpath('/match/round').each do |round|
|
|
73
|
+
|
|
74
|
+
round_counter += 1
|
|
75
|
+
actions = round.xpath('./*')
|
|
76
|
+
el_counter=0
|
|
77
|
+
actions.each do |action|
|
|
78
|
+
el_counter += 1
|
|
79
|
+
tag_name = action.name
|
|
80
|
+
if tag_name == 'move'
|
|
81
|
+
# import moves.
|
|
82
|
+
i = Item.new(identifier: "round-#{round_counter}-move-#{el_counter}", document: f)
|
|
83
|
+
time_val = action['time'].gsub(/^\+/, '').to_i
|
|
84
|
+
i.point_links << PointLink.new(identifier: "move-#{el_counter}-t", point: time_val , target_object: t, document: f)
|
|
85
|
+
# get x and y values
|
|
86
|
+
to = action['to'].split(",").map(&:to_i)
|
|
87
|
+
i.point_links << PointLink.new(identifier: "move-#{el_counter}-x", point: to[0], target_object: x, document: f)
|
|
88
|
+
i.point_links << PointLink.new(identifier: "move-#{el_counter}-y", point: to[1], target_object: y, document: f)
|
|
89
|
+
i.data = Data.new(item: i, document: f)
|
|
90
|
+
# link layer
|
|
91
|
+
i.layer_links << LayerLink.new(identifier: "move-#{el_counter}-layer", target_object: lMoves, document: f)
|
|
92
|
+
fiesta_document.items << i
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
if tag_name == 'chat'
|
|
96
|
+
i = Item.new(identifier: "round-#{round_counter}-chat-#{el_counter}", document: f)
|
|
97
|
+
time_val = action['time'].gsub(/^\+/, '').to_i
|
|
98
|
+
i.point_links << PointLink.new(identifier: "chat-#{el_counter}-t", point: time_val , target_object: t, document: f)
|
|
99
|
+
i.data = Data.new(:string_value => action['message'], item: i, document: f)
|
|
100
|
+
i.layer_links << LayerLink.new(identifier: "chat-#{el_counter}-layer", target_object: lChats, document: f)
|
|
101
|
+
fiesta_document.items << i
|
|
102
|
+
# todo: remember this chat item, the next annotations refer to it
|
|
103
|
+
|
|
104
|
+
last_chat_elem = action
|
|
105
|
+
last_chat_item = i
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
if tag_name == 'annotation'
|
|
109
|
+
# - ./spelling
|
|
110
|
+
# - word/correction pairs
|
|
111
|
+
# - ./forms
|
|
112
|
+
# - ./colors
|
|
113
|
+
# - ./sentence
|
|
114
|
+
# - @value
|
|
115
|
+
# - @type
|
|
116
|
+
# - @lok
|
|
117
|
+
# - @no
|
|
118
|
+
# - ./parsetree
|
|
119
|
+
# - @tiefe
|
|
120
|
+
# - @verzweigung
|
|
121
|
+
# - @hoeflichkeit
|
|
122
|
+
|
|
123
|
+
action.xpath('./sentence').each do |sentence|
|
|
124
|
+
# sentence : xml node of the sentence
|
|
125
|
+
|
|
126
|
+
# get running number
|
|
127
|
+
s_no = sentence['no'].to_i
|
|
128
|
+
s_id = sentence['id']
|
|
129
|
+
|
|
130
|
+
sent_item = Item.new identifier: s_id, document: f
|
|
131
|
+
sent_item.item_links << ItemLink.new(identifier: "#{s_id}-to-chat", target_object: last_chat_item, role: 'parent', document: f )
|
|
132
|
+
sent_item.layer_links << LayerLink.new(identifier:"#{s_id}-to-layer", target_object: lSents, document: f )
|
|
133
|
+
|
|
134
|
+
sent_item.data = Data.new map: Mexico::FileSystem::FiestaMap.new({
|
|
135
|
+
value: sentence['value'],
|
|
136
|
+
type: sentence['type'],
|
|
137
|
+
lok: sentence['lok'],
|
|
138
|
+
no: sentence['no']}),
|
|
139
|
+
item: sent_item, document: f
|
|
140
|
+
sidm = sent_item.data.map
|
|
141
|
+
f.items << sent_item
|
|
142
|
+
|
|
143
|
+
parsetree_elem = sentence.xpath('./parsetree').first
|
|
144
|
+
pt_id = parsetree_elem['id']
|
|
145
|
+
parsetree_item = Item.new identifier: pt_id , document: f
|
|
146
|
+
|
|
147
|
+
parsetree_item.item_links << ItemLink.new(identifier: "#{pt_id}-to-sentence", target_object: sent_item, role: 'parent', document: f)
|
|
148
|
+
parsetree_item.layer_links << LayerLink.new(identifier:"#{pt_id}-to-layer", target_object: lParsT, document: f )
|
|
149
|
+
|
|
150
|
+
parsetree_item.data = Data.new map: Mexico::FileSystem::FiestaMap.new({
|
|
151
|
+
tiefe: parsetree_elem['tiefe'],
|
|
152
|
+
verzweigung: parsetree_elem['verzweigung'],
|
|
153
|
+
hoeflichkeit: parsetree_elem['hoeflichkeit']}),
|
|
154
|
+
item: parsetree_item, document: f
|
|
155
|
+
|
|
156
|
+
# parsetree_item.data = Mexico::FileSystem::Data.new string_value: "Parsetree"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
f.items << parsetree_item
|
|
160
|
+
|
|
161
|
+
convert_phrases f, parsetree_item, lParsP, parsetree_elem
|
|
162
|
+
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
return fiesta_document
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Attempts to export the given FiESTA document to the B6 chat game format.
|
|
172
|
+
# Currently, this does not work since the B6 format is too specialised.
|
|
173
|
+
def export(doc, io=$stdout)
|
|
174
|
+
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# A recursive method that converts phrase structures with variable depth into a linked FiESTA annotation structure.
|
|
178
|
+
# @param fdoc [FiestaDocument] The FiESTA document to which all items shall be added.
|
|
179
|
+
# @param parent_item [Item] The parent item object to which children shall be added.
|
|
180
|
+
# @param layer [Layer] The FiESTA layer that shall contain all annotations.
|
|
181
|
+
# @param node [Node] The XML node that contains the phrase structures to be parsed.
|
|
182
|
+
# @return nil
|
|
183
|
+
def convert_phrases(fdoc, parent_item, layer, node)
|
|
184
|
+
|
|
185
|
+
k = 1
|
|
186
|
+
node.xpath('./*').each do |p|
|
|
187
|
+
|
|
188
|
+
if p.element?
|
|
189
|
+
|
|
190
|
+
i = Mexico::FileSystem::Item.new identifier: "#{parent_item.identifier}-#{k}", document: fdoc
|
|
191
|
+
|
|
192
|
+
i.item_links << ItemLink.new(identifier: "#{i.identifier}-il", target_object: parent_item, role: 'parent', document: fdoc)
|
|
193
|
+
i.layer_links << LayerLink.new(identifier:"#{i.identifier}-to-layer", target_object: layer, document: fdoc )
|
|
194
|
+
|
|
195
|
+
i.data = Data.new string_value: p.name, item: i, document: fdoc
|
|
196
|
+
|
|
197
|
+
fdoc.items << i
|
|
198
|
+
|
|
199
|
+
convert_phrases fdoc, i, layer, p
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
if p.children.first.text?
|
|
204
|
+
|
|
205
|
+
j = Mexico::FileSystem::Item.new identifier: "#{parent_item.identifier}-#{k}-val", document: fdoc
|
|
206
|
+
|
|
207
|
+
j.item_links << ItemLink.new(identifier: "#{i.identifier}-il", target_object: i, role: 'parent', document: fdoc)
|
|
208
|
+
j.layer_links << LayerLink.new(identifier:"#{i.identifier}-to-layer", target_object: layer, document: fdoc )
|
|
209
|
+
|
|
210
|
+
j.data = Data.new string_value: p.text, item: j, document: fdoc
|
|
211
|
+
|
|
212
|
+
fdoc.items << j
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
k=k+1
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# This file is part of the MExiCo gem.
|
|
2
|
+
# Copyright (c) 2012-2014 Peter Menke, SFB 673, Universität Bielefeld
|
|
3
|
+
# http://www.sfb673.org
|
|
4
|
+
#
|
|
5
|
+
# MExiCo is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU Lesser General Public License as
|
|
7
|
+
# published by the Free Software Foundation, either version 3 of
|
|
8
|
+
# the License, or (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# MExiCo is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU Lesser General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
16
|
+
# License along with MExiCo. If not, see
|
|
17
|
+
# <http://www.gnu.org/licenses/>.
|
|
18
|
+
|
|
19
|
+
# Import and export interface for the chat game file format by
|
|
20
|
+
# project B6.
|
|
21
|
+
|
|
22
|
+
require 'singleton'
|
|
23
|
+
|
|
24
|
+
class Mexico::Fiesta::Interfaces::ElanInterface
|
|
25
|
+
|
|
26
|
+
include Singleton
|
|
27
|
+
include Mexico::FileSystem
|
|
28
|
+
|
|
29
|
+
def self.import(io=$stdin, params = {})
|
|
30
|
+
puts 'class method import'
|
|
31
|
+
instance.import(io, params)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.export(doc, io=$stdout, params = {})
|
|
35
|
+
instance.export(doc, io, params)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def import(io=$stdin, params = {})
|
|
39
|
+
puts 'instance method import'
|
|
40
|
+
encoding = params.has_key?(:encoding) ? params[:encoding] : 'UTF-8'
|
|
41
|
+
xmldoc = ::Nokogiri::XML(io)
|
|
42
|
+
|
|
43
|
+
document = Mexico::FileSystem::FiestaDocument.new
|
|
44
|
+
|
|
45
|
+
# 1. create a standard timeline
|
|
46
|
+
timeline = document.add_standard_timeline('s')
|
|
47
|
+
|
|
48
|
+
# 2. find time slots, store
|
|
49
|
+
timeslots = Hash.new
|
|
50
|
+
xmldoc.xpath("//TIME_ORDER/TIME_SLOT").each do |t|
|
|
51
|
+
slot = t["TIME_SLOT_ID"]
|
|
52
|
+
val = t["TIME_VALUE"].to_i
|
|
53
|
+
timeslots[slot] = val
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# create temporary hash for storage of layers
|
|
57
|
+
layerHash = Hash.new
|
|
58
|
+
|
|
59
|
+
xmldoc.xpath("//TIER").each do |t|
|
|
60
|
+
|
|
61
|
+
# @todo (DEFAULT_LOCALE="en") (LINGUISTIC_TYPE_REF="default-lt")
|
|
62
|
+
tierID = t["TIER_ID"]
|
|
63
|
+
puts 'Read layers, %s' % tierID
|
|
64
|
+
|
|
65
|
+
layer = Mexico::FileSystem::Layer.new(identifier: tierID,
|
|
66
|
+
name: tierID,
|
|
67
|
+
document: document)
|
|
68
|
+
#layer.name = tierID
|
|
69
|
+
#layer.id = ToE::Util::to_xml_id(tierID)
|
|
70
|
+
|
|
71
|
+
document.layers << layer
|
|
72
|
+
|
|
73
|
+
puts t.attributes
|
|
74
|
+
puts t.attributes.has_key?('PARENT_REF')
|
|
75
|
+
if t.attributes.has_key?('PARENT_REF')
|
|
76
|
+
# puts "TATT: %s" % t['PARENT_REF']
|
|
77
|
+
document.layers.each do |l|
|
|
78
|
+
puts "LAYER %s %s" % [l.identifier, l.name]
|
|
79
|
+
end
|
|
80
|
+
parent_layer = document.get_layer_by_id(t['PARENT_REF'])
|
|
81
|
+
puts parent_layer
|
|
82
|
+
if parent_layer
|
|
83
|
+
layer_connector = Mexico::FileSystem::LayerConnector.new parent_layer, layer, {
|
|
84
|
+
identifier: "#{parent_layer.identifier}_TO_#{layer.identifier}",
|
|
85
|
+
role: 'PARENT_CHILD',
|
|
86
|
+
document: document
|
|
87
|
+
}
|
|
88
|
+
document.add_layer_connector(layer_connector)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
layerHash[tierID] = layer
|
|
93
|
+
t.xpath("./ANNOTATION").each do |annoContainer|
|
|
94
|
+
annoContainer.xpath("child::*").each do |anno|
|
|
95
|
+
annoVal = anno.xpath("./ANNOTATION_VALUE/text()").first.to_s
|
|
96
|
+
i = document.add_item identifier: anno["ANNOTATION_ID"]
|
|
97
|
+
|
|
98
|
+
if anno.name == "ALIGNABLE_ANNOTATION"
|
|
99
|
+
|
|
100
|
+
# puts anno.xpath("./ANNOTATION_VALUE/text()").first
|
|
101
|
+
if annoVal!=nil && annoVal.strip != ""
|
|
102
|
+
i.add_interval_link Mexico::FileSystem::IntervalLink.new(identifier: "#{i.identifier}-int",
|
|
103
|
+
min: timeslots[anno["TIME_SLOT_REF1"]].to_f,
|
|
104
|
+
max: timeslots[anno["TIME_SLOT_REF2"]].to_f,
|
|
105
|
+
target_object: timeline)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
if anno.name == "REF_ANNOTATION"
|
|
109
|
+
|
|
110
|
+
puts pp anno
|
|
111
|
+
puts document.items.collect{|x| x.identifier}.join(', ')
|
|
112
|
+
puts '-'*80
|
|
113
|
+
|
|
114
|
+
i.add_item_link Mexico::FileSystem::ItemLink.new(identifier: "#{i.identifier}-itm",
|
|
115
|
+
target_object: document.items({identifier: anno["ANNOTATION_REF"]}).first,
|
|
116
|
+
role: Mexico::FileSystem::ItemLink::ROLE_PARENT)
|
|
117
|
+
end
|
|
118
|
+
i.add_layer_link Mexico::FileSystem::LayerLink.new(identifier: "#{i.identifier}-lay",
|
|
119
|
+
target_object: layer)
|
|
120
|
+
i.data = Mexico::FileSystem::Data.new(string_value: annoVal)
|
|
121
|
+
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
#if t["PARENT_REF"]
|
|
126
|
+
# parent = layerHash[t["PARENT_REF"]]
|
|
127
|
+
# if parent
|
|
128
|
+
# document.layer_connectors << Mexico::FileSystem::LayerConnector.new(parent, layer)
|
|
129
|
+
# # structure.connect(parent, layer)
|
|
130
|
+
# end
|
|
131
|
+
#end
|
|
132
|
+
|
|
133
|
+
end
|
|
134
|
+
puts 'instance method over'
|
|
135
|
+
document
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def export(doc, io=$stdout, params = {})
|
|
139
|
+
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
# This file is part of the MExiCo gem.
|
|
3
|
+
# Copyright (c) 2012-2014 Peter Menke, SFB 673, Universität Bielefeld
|
|
4
|
+
# http://www.sfb673.org
|
|
5
|
+
#
|
|
6
|
+
# MExiCo is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU Lesser General Public License as
|
|
8
|
+
# published by the Free Software Foundation, either version 3 of
|
|
9
|
+
# the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# MExiCo is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with MExiCo. If not, see
|
|
18
|
+
# <http://www.gnu.org/licenses/>.
|
|
19
|
+
|
|
20
|
+
# Import and export interface for Praat's short text grid format.
|
|
21
|
+
class Mexico::Fiesta::Interfaces::ShortTextGridInterface
|
|
22
|
+
|
|
23
|
+
include Singleton
|
|
24
|
+
include Mexico::FileSystem
|
|
25
|
+
|
|
26
|
+
def self.import(io=$stdin, params = {})
|
|
27
|
+
puts 'class method import'
|
|
28
|
+
instance.import(io, params)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.export(doc, io=$stdout, params = {})
|
|
32
|
+
instance.export(doc, io, params)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def import(io=$stdin, params = {})
|
|
36
|
+
puts 'instance method import'
|
|
37
|
+
encoding = params.has_key?(:encoding) ? params[:encoding] : 'UTF-16'
|
|
38
|
+
|
|
39
|
+
fdoc = FiestaDocument.new
|
|
40
|
+
timeline = fdoc.add_standard_timeline('s')
|
|
41
|
+
|
|
42
|
+
fileType = io.gets.strip
|
|
43
|
+
objectClass = io.gets.strip
|
|
44
|
+
io.gets # blank line
|
|
45
|
+
global_min = io.gets.to_f
|
|
46
|
+
global_max = io.gets.to_f
|
|
47
|
+
io.gets # <exists>
|
|
48
|
+
|
|
49
|
+
# get the numbers of tiers in this document.
|
|
50
|
+
numberOfTiers = io.gets.to_i
|
|
51
|
+
|
|
52
|
+
numberOfTiers.times do |tierNumber|
|
|
53
|
+
tierType = io.gets.strip
|
|
54
|
+
tierName = Mexico::Util::strip_quotes(io.gets.strip)
|
|
55
|
+
tier_min = io.gets.to_f
|
|
56
|
+
tier_max = io.gets.to_f
|
|
57
|
+
|
|
58
|
+
# create layer object from that tier
|
|
59
|
+
#puts "layer constructor before"
|
|
60
|
+
layer = fdoc.add_layer({identifier:tierName, name:tierName})
|
|
61
|
+
#puts "layer constructor done"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
numberOfAnnotations = io.gets.to_i
|
|
65
|
+
|
|
66
|
+
numberOfAnnotations.times do |annotationNumber|
|
|
67
|
+
|
|
68
|
+
anno_min = io.gets.to_f
|
|
69
|
+
anno_max = io.gets.to_f
|
|
70
|
+
anno_val = io.gets.strip.gsub(/^"/, "").gsub(/"$/, "")
|
|
71
|
+
|
|
72
|
+
#puts " #{anno_val} [#{anno_min}--#{anno_max}]"
|
|
73
|
+
|
|
74
|
+
if anno_val.strip != ""
|
|
75
|
+
|
|
76
|
+
item = fdoc.add_item({identifier:"l#{tierNumber}a#{annotationNumber}"}) do |i|
|
|
77
|
+
i.add_interval_link IntervalLink.new(
|
|
78
|
+
identifier:"#{i.identifier}-il",
|
|
79
|
+
min: anno_min,
|
|
80
|
+
max: anno_max,
|
|
81
|
+
target_object: timeline )
|
|
82
|
+
i.data = Mexico::FileSystem::Data.new(string_value: anno_val)
|
|
83
|
+
i.add_layer_link LayerLink.new(
|
|
84
|
+
identifier:"#{i.identifier}-ll",
|
|
85
|
+
target_object: layer )
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
puts item
|
|
89
|
+
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
fdoc
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
# This file is part of the MExiCo gem.
|
|
3
|
+
# Copyright (c) 2012-2014 Peter Menke, SFB 673, Universität Bielefeld
|
|
4
|
+
# http://www.sfb673.org
|
|
5
|
+
#
|
|
6
|
+
# MExiCo is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU Lesser General Public License as
|
|
8
|
+
# published by the Free Software Foundation, either version 3 of
|
|
9
|
+
# the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# MExiCo is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with MExiCo. If not, see
|
|
18
|
+
# <http://www.gnu.org/licenses/>.
|
|
19
|
+
|
|
20
|
+
# Import and export interface for Praat's text grid format.
|
|
21
|
+
class Mexico::Fiesta::Interfaces::TextGridInterface
|
|
22
|
+
|
|
23
|
+
include Singleton
|
|
24
|
+
include Mexico::FileSystem
|
|
25
|
+
|
|
26
|
+
def self.import(io=$stdin, params = {})
|
|
27
|
+
puts 'class method import'
|
|
28
|
+
instance.import(io, params)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.export(doc, io=$stdout, params = {})
|
|
32
|
+
instance.export(doc, io, params)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def import(io=$stdin, params = {})
|
|
36
|
+
|
|
37
|
+
fdoc = FiestaDocument.new
|
|
38
|
+
timeline = fdoc.add_standard_timeline('s')
|
|
39
|
+
|
|
40
|
+
encoding = params.has_key?(:encoding) ? params[:encoding] : 'UTF-16'
|
|
41
|
+
|
|
42
|
+
fdoc = FiestaDocument.new
|
|
43
|
+
timeline = fdoc.add_standard_timeline('s')
|
|
44
|
+
|
|
45
|
+
fileType = io.gets.strip
|
|
46
|
+
objectClass = io.gets.strip
|
|
47
|
+
io.gets # blank line
|
|
48
|
+
global_min = io.gets.to_f
|
|
49
|
+
global_max = io.gets.to_f
|
|
50
|
+
io.gets # <exists>
|
|
51
|
+
|
|
52
|
+
size_spec = io.gets.strip
|
|
53
|
+
size_match = size_spec.match(/\d+/)
|
|
54
|
+
size_spec = size_match[0].to_i if size_match
|
|
55
|
+
|
|
56
|
+
io.gets # item container header
|
|
57
|
+
for tier_num in (1..size_spec)
|
|
58
|
+
# read tier item line, drop it
|
|
59
|
+
io.gets # single item header
|
|
60
|
+
|
|
61
|
+
tierClass = io.gets.match(/"(.*)"/)[1]
|
|
62
|
+
tierName = Mexico::Util::strip_quotes(io.gets.match(/"(.*)"/)[1])
|
|
63
|
+
tierXmin = io.gets.match(/(\d+(\.\d+)?)/)[1].to_f
|
|
64
|
+
tierXmax = io.gets.match(/(\d+(\.\d+)?)/)[1].to_f
|
|
65
|
+
tierSize = io.gets.match(/size\s*=\s*(\d+)/)[1].to_i
|
|
66
|
+
|
|
67
|
+
# create layer object from that tier
|
|
68
|
+
layer = fdoc.add_layer({identifier:tierName, name:tierName})
|
|
69
|
+
|
|
70
|
+
for anno_num in (1..tierSize)
|
|
71
|
+
|
|
72
|
+
io.gets
|
|
73
|
+
annoMin = io.gets.match(/(\d+(\.\d+)?)/)[1].to_f
|
|
74
|
+
annoMax = io.gets.match(/(\d+(\.\d+)?)/)[1].to_f
|
|
75
|
+
annoVal = io.gets.match(/"(.*)"/)[1]
|
|
76
|
+
|
|
77
|
+
if annoVal.strip != ""
|
|
78
|
+
|
|
79
|
+
item = fdoc.add_item({identifier:"l#{tier_num}a#{anno_num}"}) do |i|
|
|
80
|
+
i.add_interval_link IntervalLink.new(
|
|
81
|
+
identifier:"#{i.identifier}-il",
|
|
82
|
+
min: annoMin,
|
|
83
|
+
max: annoMax,
|
|
84
|
+
target_object: timeline )
|
|
85
|
+
i.data = Mexico::FileSystem::Data.new(string_value: annoVal)
|
|
86
|
+
i.add_layer_link LayerLink.new(
|
|
87
|
+
identifier:"#{i.identifier}-ll",
|
|
88
|
+
target_object: layer )
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
puts item
|
|
92
|
+
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
fdoc
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
end
|