proiel-cli 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,192 +1,190 @@
1
- module PROIEL
2
- module Converter
3
- class Tiger2
4
- SCHEMA_FILE = File.join('tiger2', 'Tiger2.xsd')
5
-
6
- class << self
7
- def process(tb, options)
8
- selected_features = [] # TODO
9
- @features = selected_features.map { |f| [f, 'FREC'] }
10
-
11
- builder = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
12
- builder.instruct! :xml, version: "1.0", encoding: "UTF-8"
13
-
14
- tb.sources.each do |source|
15
- @hack = tb.annotation_schema
16
- write_source(builder, source, tb) do
17
- source.divs.each do |div|
18
- div.sentences.each do |sentence|
19
- write_sentence(builder, sentence)
20
- end
1
+ module PROIEL::Converter
2
+ class Tiger2
3
+ SCHEMA_FILE = File.join('tiger2', 'Tiger2.xsd')
4
+
5
+ class << self
6
+ def process(tb, _)
7
+ selected_features = [] # TODO
8
+ @features = selected_features.map { |f| [f, 'FREC'] }
9
+
10
+ builder = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
11
+ builder.instruct! :xml, version: '1.0', encoding: 'UTF-8'
12
+
13
+ tb.sources.each do |source|
14
+ @hack = tb.annotation_schema
15
+ write_source(builder, source, tb) do
16
+ source.divs.each do |div|
17
+ div.sentences.each do |sentence|
18
+ write_sentence(builder, sentence)
21
19
  end
22
20
  end
23
21
  end
24
22
  end
23
+ end
25
24
 
26
- def write_source(builder, s, tb)
27
- builder.corpus('xml:id' => s.id,
28
- 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
29
- 'xsi:schemaLocation' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/ http://korpling.german.hu-berlin.de/tiger2/V2.0.5/Tiger2.xsd',
30
- 'xmlns:tiger2' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/',
31
- 'xmlns' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/') do
32
- builder.head do
33
- builder.meta do
34
- builder.name(s.title)
35
- builder.author('The PROIEL project')
36
- builder.date(s.export_time.strftime("%F %T %z"))
37
- builder.description
38
- builder.format
39
- builder.history
40
- end
41
-
42
- declare_annotation(builder, @features,
43
- tb.annotation_schema)
25
+ def write_source(builder, s, tb)
26
+ builder.corpus('xml:id' => s.id,
27
+ 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
28
+ 'xsi:schemaLocation' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/ http://korpling.german.hu-berlin.de/tiger2/V2.0.5/Tiger2.xsd',
29
+ 'xmlns:tiger2' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/',
30
+ 'xmlns' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/') do
31
+ builder.head do
32
+ builder.meta do
33
+ builder.name(s.title)
34
+ builder.author('The PROIEL project')
35
+ builder.date(s.export_time.strftime('%F %T %z'))
36
+ builder.description
37
+ builder.format
38
+ builder.history
44
39
  end
45
40
 
46
- builder.body do
47
- yield builder
48
- end
41
+ declare_annotation(builder, @features,
42
+ tb.annotation_schema)
43
+ end
44
+
45
+ builder.body do
46
+ yield builder
49
47
  end
50
48
  end
49
+ end
51
50
 
52
- def declare_annotation(builder, features, annotation_schema)
53
- builder.annotation do
54
- features.each do |name, domain|
55
- # FIXME: we may want to list possible values for some of these
56
- builder.feature(name: name, domain: domain)
57
- end
51
+ def declare_annotation(builder, features, annotation_schema)
52
+ builder.annotation do
53
+ features.each do |name, domain|
54
+ # FIXME: we may want to list possible values for some of these
55
+ builder.feature(name: name, domain: domain)
56
+ end
58
57
 
59
- builder.edgelabel do
60
- builder.value(name: '--')
58
+ builder.edgelabel do
59
+ builder.value(name: '--')
61
60
 
62
- annotation_schema.primary_relations.each do |tag, features|
63
- builder.value({ name: tag }, features.summary)
64
- end
61
+ annotation_schema.primary_relations.each do |tag, features|
62
+ builder.value({ name: tag }, features.summary)
65
63
  end
64
+ end
66
65
 
67
- builder.secedgelabel do
68
- annotation_schema.secondary_relations.each do |tag, features|
69
- builder.value({name: tag }, features.summary)
70
- end
66
+ builder.secedgelabel do
67
+ annotation_schema.secondary_relations.each do |tag, features|
68
+ builder.value({name: tag }, features.summary)
71
69
  end
72
70
  end
73
71
  end
72
+ end
74
73
 
75
- def declare_edgelabels(builder)
76
- builder.feature(name: "label", type: "prim", domain: "edge") do
77
- declare_primary_edges(builder)
78
- end
74
+ def declare_edgelabels(builder)
75
+ builder.feature(name: 'label', type: 'prim', domain: 'edge') do
76
+ declare_primary_edges(builder)
77
+ end
79
78
 
80
- builder.feature(name: "label", type: "sec", domain: "edge") do
81
- declare_secedges(builder)
82
- end
79
+ builder.feature(name: 'label', type: 'sec', domain: 'edge') do
80
+ declare_secedges(builder)
81
+ end
83
82
 
84
- builder.feature(name: "label", type: "coref", domain: "edge") do
85
- builder.value(name: "antecedent")
86
- builder.value(name: "inference")
87
- end
83
+ builder.feature(name: 'label', type: 'coref', domain: 'edge') do
84
+ builder.value(name: 'antecedent')
85
+ builder.value(name: 'inference')
88
86
  end
87
+ end
89
88
 
90
- def write_sentence(builder, s)
91
- builder.s('xml:id' => "s#{s.id}") do
92
- builder.graph(root: "s#{s.id}_root") do
93
- write_terminals(builder, s)
94
- write_nonterminals(builder, s)
95
- end
89
+ def write_sentence(builder, s)
90
+ builder.s('xml:id' => "s#{s.id}") do
91
+ builder.graph(root: "s#{s.id}_root") do
92
+ write_terminals(builder, s)
93
+ write_nonterminals(builder, s)
96
94
  end
97
95
  end
96
+ end
98
97
 
99
- def write_terminals(builder, s)
100
- builder.terminals do
101
- s.tokens.each do |t|
102
- builder.t(token_attrs(s, t, 'T').merge({ 'xml:id' => "w#{t.id}"}))
103
- end
98
+ def write_terminals(builder, s)
99
+ builder.terminals do
100
+ s.tokens.each do |t|
101
+ builder.t(token_attrs(t, 'T').merge({ 'xml:id' => "w#{t.id}"}))
104
102
  end
105
103
  end
104
+ end
106
105
 
107
- def token_attrs(s, t, type)
108
- attrs = {}
109
-
110
- @features.each do |name, domain|
111
- if domain == 'FREC' or domain == type
112
- case name
113
- when :word, :cat
114
- attrs[name] = t.pro? ? "PRO-#{t.relation.upcase}" : t.form
115
- when *@semantic_features
116
- attrs[name] = t.sem_tags_to_hash[attr]
117
- when :lemma
118
- attrs[name] = t.lemma
119
- when :pos
120
- if t.empty_token_sort
121
- attrs[name] = t.empty_token_sort + "-"
122
- else
123
- attrs[name] = t.pos
124
- end
125
- when *MORPHOLOGICAL_FEATURES
126
- attrs[name] = name.to_s.split("_").map { |a| t.morphology_hash[a.to_sym] || '-' }.join
106
+ def token_attrs(t, type)
107
+ attrs = {}
108
+
109
+ @features.each do |name, domain|
110
+ if domain == 'FREC' or domain == type
111
+ case name
112
+ when :word, :cat
113
+ attrs[name] = t.pro? ? "PRO-#{t.relation.upcase}" : t.form
114
+ when *@semantic_features
115
+ attrs[name] = t.sem_tags_to_hash[attr]
116
+ when :lemma
117
+ attrs[name] = t.lemma
118
+ when :pos
119
+ if t.empty_token_sort
120
+ attrs[name] = t.empty_token_sort + '-'
127
121
  else
128
- if t.respond_to?(name)
129
- attrs[name] = t.send(name)
130
- else
131
- raise "Do not know how to get required attribute #{name}"
132
- end
122
+ attrs[name] = t.pos
123
+ end
124
+ when *MORPHOLOGICAL_FEATURES
125
+ attrs[name] = name.to_s.split('_').map { |a| t.morphology_hash[a.to_sym] || '-' }.join
126
+ else
127
+ if t.respond_to?(name)
128
+ attrs[name] = t.send(name)
129
+ else
130
+ raise "Do not know how to get required attribute #{name}"
133
131
  end
134
- attrs[name] ||= "--"
135
132
  end
133
+ attrs[name] ||= '--'
136
134
  end
137
-
138
- attrs
139
135
  end
140
136
 
141
- def write_nonterminals(builder, s)
142
- builder.nonterminals do
143
- # Add an empty root node
144
- h = @features.select { |_, domain| ['FREC', 'NT'].include?(domain) }.map { |name, _| [name, '--'] }.to_h
145
- h['xml:id'] = "s#{s.id}_root"
137
+ attrs
138
+ end
139
+
140
+ def write_nonterminals(builder, s)
141
+ builder.nonterminals do
142
+ # Add an empty root node
143
+ h = @features.select { |_, domain| ['FREC', 'NT'].include?(domain) }.map { |name, _| [name, '--'] }.to_h
144
+ h['xml:id'] = "s#{s.id}_root"
146
145
 
147
- builder.nt(h) do
148
- s.tokens.reject { |t| t.head or t.pro? }.each do |t|
149
- builder.edge(idref: "p#{t.id}", label: t.relation)
150
- end
146
+ builder.nt(h) do
147
+ s.tokens.reject { |t| t.head or t.pro? }.each do |t|
148
+ builder.edge(idref: "p#{t.id}", label: t.relation)
151
149
  end
150
+ end
152
151
 
153
- # Add other NTs
154
- s.tokens.each do |t|
155
- builder.nt(token_attrs(s, t, 'NT').merge('xml:id' => "p#{t.id}")) do
156
- # Add an edge to the correspoding terminal node
157
- builder.edge(idref: "w#{t.id}", label: '--')
152
+ # Add other NTs
153
+ s.tokens.each do |t|
154
+ builder.nt(token_attrs(t, 'NT').merge('xml:id' => "p#{t.id}")) do
155
+ # Add an edge to the correspoding terminal node
156
+ builder.edge(idref: "w#{t.id}", label: '--')
158
157
 
159
- # Add primary dependency edges
160
- t.children.each { |d| builder.edge(idref: "p#{d.id}", label: d.relation) }
158
+ # Add primary dependency edges
159
+ t.children.each { |d| builder.edge(idref: "p#{d.id}", label: d.relation) }
161
160
 
162
- # Add secondary dependency edges
163
- t.slashes.each do |relation, target_id|
164
- builder.secedge(idref: "p#{target_id}", label: relation)
165
- end
161
+ # Add secondary dependency edges
162
+ t.slashes.each do |relation, target_id|
163
+ builder.secedge(idref: "p#{target_id}", label: relation)
166
164
  end
167
165
  end
168
166
  end
169
167
  end
168
+ end
170
169
 
171
- def write_root_edge(t, builder)
172
- builder.edge('tiger2:type' => "prim", 'tiger2:target' => "p#{t.id}", :label => t.relation.tag)
173
- end
174
-
175
- def write_edges(t, builder)
176
- # Add an edge between this node and the correspoding terminal node unless
177
- # this is not a morphtaggable node.
178
- builder.edge('tiger2:type' => "prim", 'tiger2:target' => "w#{t.id}", :label => '--') if t.is_morphtaggable? or t.empty_token_sort == 'P'
170
+ def write_root_edge(t, builder)
171
+ builder.edge('tiger2:type' => 'prim', 'tiger2:target' => "p#{t.id}", :label => t.relation.tag)
172
+ end
179
173
 
180
- # Add primary dependency edges including empty pro tokens if we are exporting info structure as well
181
- t.dependents.each { |d| builder.edge('tiger2:type' => "prim", 'tiger2:target' => "p#{d.id}", :label => d.relation.tag) }
174
+ def write_edges(t, builder)
175
+ # Add an edge between this node and the correspoding terminal node unless
176
+ # this is not a morphtaggable node.
177
+ builder.edge('tiger2:type' => 'prim', 'tiger2:target' => "w#{t.id}", :label => '--') if t.is_morphtaggable? or t.empty_token_sort == 'P'
182
178
 
183
- # Add secondary dependency edges
184
- get_slashes(t).each do |se|
185
- builder.edge('tiger2:type' => "sec", 'tiger2:target' => "p#{se.slashee_id}", :label => se.relation.tag)
186
- end
179
+ # Add primary dependency edges including empty pro tokens if we are exporting info structure as well
180
+ t.dependents.each { |d| builder.edge('tiger2:type' => 'prim', 'tiger2:target' => "p#{d.id}", :label => d.relation.tag) }
187
181
 
188
- builder.edge('tiger2:type' => "coref", 'tiger2:target' => t.antecedent_id, :label => (t.information_status_tag == 'acc_inf' ? "inference" : "antecedent") )
182
+ # Add secondary dependency edges
183
+ get_slashes(t).each do |se|
184
+ builder.edge('tiger2:type' => 'sec', 'tiger2:target' => "p#{se.slashee_id}", :label => se.relation.tag)
189
185
  end
186
+
187
+ builder.edge('tiger2:type' => 'coref', 'tiger2:target' => t.antecedent_id, :label => (t.information_status_tag == 'acc_inf' ? 'inference' : 'antecedent'))
190
188
  end
191
189
  end
192
190
  end
@@ -1,26 +1,30 @@
1
- module PROIEL
2
- module Converter
3
- class TNT
4
- class << self
5
- def process(tb, options)
6
- tb.sources.each do |source|
7
- puts "%% Source #{source.id}"
8
- puts '--'
1
+ module PROIEL::Converter
2
+ class TNT
3
+ class << self
4
+ def process(tb, options)
5
+ tb.sources.each do |source|
6
+ puts "%% Source #{source.id}"
7
+ puts '--'
9
8
 
10
- source.divs.each do |div|
11
- div.sentences.each do |sentence|
12
- puts "%% Sentence #{sentence.id}"
13
- sentence.tokens.each do |token|
14
- unless token.form.nil?
9
+ source.divs.each do |div|
10
+ div.sentences.each do |sentence|
11
+ puts "%% Sentence #{sentence.id}"
12
+ sentence.tokens.each do |token|
13
+ if options['pos'] or options['morphology']
14
+ unless token.form.nil? or token.pos.nil?
15
15
  if options['morphology']
16
- puts [token.form, token.pos + token.morphology].join("\t")
16
+ unless token.morphology.nil?
17
+ puts [token.form, token.pos + token.morphology].join("\t")
18
+ end
17
19
  else
18
20
  puts [token.form, token.pos].join("\t")
19
21
  end
20
22
  end
23
+ else
24
+ puts token.form
21
25
  end
22
- puts '--'
23
26
  end
27
+ puts '--'
24
28
  end
25
29
  end
26
30
  end
@@ -1,5 +1,5 @@
1
1
  module PROIEL
2
2
  module CLI
3
- VERSION = '1.2.1'
3
+ VERSION = '1.3.0'
4
4
  end
5
5
  end
data/lib/proiel/cli.rb CHANGED
@@ -1,4 +1,29 @@
1
+ require 'builder'
2
+ require 'colorize'
3
+ require 'mercenary'
4
+ require 'proiel'
5
+ require 'pry'
1
6
  require 'ruby-progressbar'
2
7
 
3
8
  require 'proiel/cli/version'
4
- require 'proiel/cli/commands'
9
+
10
+ module PROIEL
11
+ class Command
12
+ class << self
13
+ def subclasses
14
+ @subclasses ||= []
15
+ end
16
+
17
+ def inherited(base)
18
+ subclasses << base
19
+ super(base)
20
+ end
21
+ end
22
+ end
23
+
24
+ module Converter; end
25
+ end
26
+
27
+ Dir[File.join(File.dirname(__FILE__), 'cli', '{commands,converters}', '*.rb')].sort.each do |f|
28
+ require f
29
+ end