proiel-cli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/proiel/cli/converters/proielxml.rb +1 -1
- data/lib/proiel/cli/version.rb +1 -1
- metadata +2 -7
- data/examples/decision-tree.rb +0 -41
- data/examples/dep-pos-cooccurrences.rb +0 -84
- data/examples/lint-rules.rb +0 -174
- data/examples/relation-as-disambiguator.rb +0 -134
- data/examples/word-occurrences.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bfb4db333ae791d4171490c7b3d316364b205729
|
4
|
+
data.tar.gz: fd001265bdcc8e75e49fc1e2a188f7c63471bb62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29cb26e28f22486db097b982d7e2cd4783f2b38d8d6c13fb06309fb25377791cd8248c88627b7d5b72c3adfe0e958c9696b2508fcb975e9a8fdf516a9825963b
|
7
|
+
data.tar.gz: b565292467456b10415039f0464bdfafcae5fecf55747dd3b9ec2473313ba08789d49431ecb06d649596bfd98031640481efccd9b5be58b6b565d13dcc3a1161
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# PROIEL command-line interface
|
2
2
|
|
3
|
-
This is a command-line interface for
|
3
|
+
This is a command-line interface for manipulating PROIEL treebanks.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
-
Install as
|
7
|
+
This library requires Ruby >= 2.1. Install as
|
8
8
|
|
9
9
|
```shell
|
10
10
|
gem install proiel-cli
|
@@ -88,7 +88,7 @@ module PROIEL
|
|
88
88
|
unless token.slashes.empty? or options['remove-syntax'] # this extra test avoids <token></token> style XML
|
89
89
|
builder.token(attrs) do
|
90
90
|
token.slashes.each do |relation, target_id|
|
91
|
-
builder.slash("target-id"
|
91
|
+
builder.slash(:"target-id" => target_id, relation: relation)
|
92
92
|
end
|
93
93
|
end
|
94
94
|
else
|
data/lib/proiel/cli/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proiel-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marius L. Jøhndal
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: builder
|
@@ -197,11 +197,6 @@ files:
|
|
197
197
|
- contrib/proiel-maltparser-parse
|
198
198
|
- contrib/proiel-maltparser-train
|
199
199
|
- contrib/proiel-tnt-train
|
200
|
-
- examples/decision-tree.rb
|
201
|
-
- examples/dep-pos-cooccurrences.rb
|
202
|
-
- examples/lint-rules.rb
|
203
|
-
- examples/relation-as-disambiguator.rb
|
204
|
-
- examples/word-occurrences.rb
|
205
200
|
- lib/proiel/cli.rb
|
206
201
|
- lib/proiel/cli/commands.rb
|
207
202
|
- lib/proiel/cli/commands/convert.rb
|
data/examples/decision-tree.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Train a decision tree on the (head_relation, head_pos, head_lemma,
|
4
|
-
# child_relation, child_pos, child_lemma) and then predict the child_relation
|
5
|
-
# of an unknown child.
|
6
|
-
#
|
7
|
-
require 'colorize'
|
8
|
-
require 'decisiontree'
|
9
|
-
require 'proiel'
|
10
|
-
|
11
|
-
if ARGV.length < 1
|
12
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
13
|
-
exit 1
|
14
|
-
end
|
15
|
-
|
16
|
-
tb = PROIEL::Treebank.new
|
17
|
-
tb.load_from_xml(ARGV)
|
18
|
-
|
19
|
-
tokens = {}
|
20
|
-
|
21
|
-
tb.sources.each do |source|
|
22
|
-
source.tokens.each do |token|
|
23
|
-
tokens[token.id.to_i] = [token.relation, token.part_of_speech, token.lemma, token.head_id]
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
training_data = tokens.map do |_, (child_relation, child_pos, child_lemma, head_id)|
|
28
|
-
if head_id
|
29
|
-
head = tokens[head_id.to_i]
|
30
|
-
head_relation, head_pos, head_lemma, _ = *head
|
31
|
-
|
32
|
-
[head_pos || '', head_lemma || '', head_relation, child_pos || '', child_lemma || '', child_relation]
|
33
|
-
end
|
34
|
-
end.compact
|
35
|
-
|
36
|
-
attributes = %w(head_pos head_lemma head_relation child_pos child_lemma)
|
37
|
-
dr = DecisionTree::ID3Tree.new(attributes, training_data, 'pred', :discrete)
|
38
|
-
dr.train
|
39
|
-
dr.save_to_file("dr.marshal")
|
40
|
-
|
41
|
-
p dr.predict(["Ne", "Gallia", "sub", "Px", "omnis"])
|
@@ -1,84 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'proiel'
|
3
|
-
require 'colorize'
|
4
|
-
require 'terminal-table'
|
5
|
-
|
6
|
-
if ARGV.length < 1
|
7
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
8
|
-
exit 1
|
9
|
-
end
|
10
|
-
|
11
|
-
tb = PROIEL::Treebank.new
|
12
|
-
tb.load_from_xml(ARGV)
|
13
|
-
|
14
|
-
# Present by POS
|
15
|
-
relations = tb.annotation.relation_tags.keys
|
16
|
-
|
17
|
-
c = {}
|
18
|
-
tb.sources.each do |s|
|
19
|
-
s.tokens.each do |t|
|
20
|
-
next if t.pos.nil? or t.relation.nil?
|
21
|
-
|
22
|
-
c[t.pos] ||= {}
|
23
|
-
c[t.pos][t.relation] ||= 0
|
24
|
-
c[t.pos][t.relation] += 1
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
rows = []
|
29
|
-
c.sort_by(&:first).each do |pos, d|
|
30
|
-
total = d.inject(0) { |a, (k, v)| a + v }
|
31
|
-
|
32
|
-
rows << [pos] + relations.map do |r|
|
33
|
-
n = d[r ? r.to_s : nil]
|
34
|
-
|
35
|
-
if n and n < total * 0.001
|
36
|
-
n.to_s.red
|
37
|
-
elsif n and n > total * 0.999
|
38
|
-
n.to_s.green
|
39
|
-
else
|
40
|
-
n
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
table = Terminal::Table.new headings: ['Part of speech'] + relations, rows: rows
|
46
|
-
puts table
|
47
|
-
puts "(red = relation occurs for less than 0.1% of tokens with this POS; green = relation occurs for more than 99.9% of tokens with this POS)"
|
48
|
-
puts
|
49
|
-
|
50
|
-
# Present by relation
|
51
|
-
poses = tb.annotation.part_of_speech_tags.keys
|
52
|
-
|
53
|
-
c = {}
|
54
|
-
|
55
|
-
tb.sources.each do |s|
|
56
|
-
s.tokens.each do |t|
|
57
|
-
next if t.pos.nil? or t.relation.nil?
|
58
|
-
|
59
|
-
c[t.relation] ||= {}
|
60
|
-
c[t.relation][t.pos] ||= 0
|
61
|
-
c[t.relation][t.pos] += 1
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
rows = []
|
66
|
-
c.sort_by(&:first).each do |relation, d|
|
67
|
-
total = d.inject(0) { |a, (k, v)| a + v }
|
68
|
-
|
69
|
-
rows << [relation] + poses.map do |r|
|
70
|
-
n = d[r ? r.to_s : nil]
|
71
|
-
|
72
|
-
if n and n < total * 0.001
|
73
|
-
n.to_s.red
|
74
|
-
elsif n and n > total * 0.999
|
75
|
-
n.to_s.green
|
76
|
-
else
|
77
|
-
n
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
table = Terminal::Table.new headings: ['Relation'] + poses, rows: rows
|
83
|
-
puts table
|
84
|
-
puts "(red = POS occurs for less than 0.1% of tokens with this relation; green = POS occurs for more than 99.9% of tokens with this relation)"
|
data/examples/lint-rules.rb
DELETED
@@ -1,174 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Very simple testing of implicational feature rules. Example rules only
|
4
|
-
# apply to Latin.
|
5
|
-
#
|
6
|
-
require 'colorize'
|
7
|
-
require 'proiel'
|
8
|
-
|
9
|
-
VIOLATIONS = {}
|
10
|
-
|
11
|
-
def report_violation(token, message)
|
12
|
-
VIOLATIONS[message] ||= []
|
13
|
-
VIOLATIONS[message] << token
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_token(token, rules, dependent_rules)
|
17
|
-
rules.each do |match_features, test_alternatives|
|
18
|
-
f = token.features + ["\"#{token.form}\""]
|
19
|
-
|
20
|
-
if (match_features - f).empty?
|
21
|
-
unless test_alternatives.any? { |test_alternative| token.features.include?(test_alternative) }
|
22
|
-
report_violation(token, "#{match_features.join(' ')}")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
dependent_rules.each do |match_features, test_alternatives|
|
28
|
-
f = token.features + ["\"#{token.form}\""]
|
29
|
-
|
30
|
-
if (match_features - f).empty?
|
31
|
-
t = token.children.all? do |dependent|
|
32
|
-
test_alternatives.any? { |test_alternative| dependent.features.include?(test_alternative) }
|
33
|
-
end
|
34
|
-
|
35
|
-
unless t
|
36
|
-
report_violation(token, "#{match_features.join(' ')} → dependents()")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def load_rules
|
43
|
-
rules = {}
|
44
|
-
dependent_rules = {}
|
45
|
-
|
46
|
-
DATA.each do |rule|
|
47
|
-
rule.chomp!
|
48
|
-
rule.sub!(/\s*#.*$/, '')
|
49
|
-
|
50
|
-
next if rule.empty?
|
51
|
-
|
52
|
-
match_features, test = rule.split(/\s*→\s*/)
|
53
|
-
match_features = match_features.split(/\s+/)
|
54
|
-
|
55
|
-
if test[/\s*dependents\(([^)]*)\)\s*/]
|
56
|
-
dependent_rules[match_features] ||= []
|
57
|
-
dependent_rules[match_features] << $1
|
58
|
-
test.sub!(/\s*dependents\([^)]*\)\s*/, '')
|
59
|
-
end
|
60
|
-
|
61
|
-
if test != ''
|
62
|
-
rules[match_features] ||= []
|
63
|
-
rules[match_features] << test
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
[rules, dependent_rules]
|
68
|
-
end
|
69
|
-
|
70
|
-
if ARGV.length < 1
|
71
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
72
|
-
exit 1
|
73
|
-
end
|
74
|
-
|
75
|
-
tb = PROIEL::Treebank.new
|
76
|
-
tb.load_from_xml(ARGV)
|
77
|
-
|
78
|
-
rules, dependent_rules = load_rules
|
79
|
-
|
80
|
-
tb.sources.each do |source|
|
81
|
-
source.sentences.each do |sentence|
|
82
|
-
if sentence.status == 'reviewed'
|
83
|
-
sentence.tokens.each do |token|
|
84
|
-
test_token(token, rules, dependent_rules)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
base_url = 'http://foni.uio.no:3000'
|
91
|
-
|
92
|
-
puts "<h1>PROIEL lint report</h1>"
|
93
|
-
|
94
|
-
VIOLATIONS.each do |rule, tokens|
|
95
|
-
puts "<h2>#{rule}</h2><ul>"
|
96
|
-
tokens.each do |token|
|
97
|
-
puts "<li>Token <a href='#{base_url}/tokens/#{token.id}'>#{token.id}</a> in sentence <a href='#{base_url}/sentences/#{token.sentence.id}'>#{token.sentence.id}</a></li>"
|
98
|
-
end
|
99
|
-
puts "</ul>"
|
100
|
-
end
|
101
|
-
|
102
|
-
__END__
|
103
|
-
|
104
|
-
# Gerundives
|
105
|
-
gdv nom → xobj # modal gerundive heading a main clause
|
106
|
-
|
107
|
-
gdv acc → comp # modal gerundive heading an AcI, or in the _curo faciendum_ type
|
108
|
-
gdv acc → xobj # modal gerundive heading an AcI with an overt auxiliary
|
109
|
-
gdv acc → obl # as argument of a preposition
|
110
|
-
gdv acc → xadv # in the _do librum legendum_ type
|
111
|
-
|
112
|
-
gdv gen → atr # in the _tempus dicendi_ type
|
113
|
-
gdv gen → narg # in the _facultas dicendi_ type
|
114
|
-
|
115
|
-
gdv abl → obl # as argument of a preposition
|
116
|
-
gdv abl → abl # in circumstantial adjuncts of various types
|
117
|
-
|
118
|
-
# Gerunds
|
119
|
-
ger nom → 0 # invalid case for a gerundive
|
120
|
-
|
121
|
-
ger acc → obl # as argument of a preposition
|
122
|
-
|
123
|
-
ger gen → atr # in the _tempus dicendi_ type
|
124
|
-
ger gen → narg # in the _facultas dicendi_ type
|
125
|
-
|
126
|
-
ger abl → obl # as argument of a preposition
|
127
|
-
ger abl → abl # in circumstantial adjuncts of various types
|
128
|
-
|
129
|
-
# Reflexive pronouns
|
130
|
-
persrefl nom → 0
|
131
|
-
|
132
|
-
persrefl acc → sub
|
133
|
-
persrefl acc → obj
|
134
|
-
persrefl acc → obl
|
135
|
-
|
136
|
-
persrefl dat → obl
|
137
|
-
persrefl dat → adv
|
138
|
-
persrefl dat → ag
|
139
|
-
|
140
|
-
persrefl abl → obl
|
141
|
-
persrefl abl → sub
|
142
|
-
|
143
|
-
persrefl "se" → acc
|
144
|
-
persrefl "se" → abl
|
145
|
-
|
146
|
-
persrefl "sese" → acc
|
147
|
-
persrefl "sese" → abl
|
148
|
-
|
149
|
-
persrefl "sibi" → dat
|
150
|
-
|
151
|
-
# Personal pronouns
|
152
|
-
perspron nom → sub
|
153
|
-
|
154
|
-
perspron acc → sub
|
155
|
-
perspron acc → obj
|
156
|
-
perspron acc → obl
|
157
|
-
|
158
|
-
perspron dat → obl
|
159
|
-
perspron dat → adv
|
160
|
-
perspron dat → ag
|
161
|
-
|
162
|
-
perspron abl → obl
|
163
|
-
perspron abl → sub
|
164
|
-
|
165
|
-
# The dependent of the complementisers _ut_ and _ne_ should be a PRED or an AUX
|
166
|
-
subj "ut" → dependents(pred) # the standard case, a predicate heading a clause
|
167
|
-
subj "ut" → dependents(aux) # some particle-like material dependent on the complementiser
|
168
|
-
|
169
|
-
subj "ne" → dependents(pred)
|
170
|
-
subj "ne" → dependents(aux)
|
171
|
-
|
172
|
-
# Particles and adverbs
|
173
|
-
"iam" → adverb adv
|
174
|
-
"iam" → adverb aux # possibly
|
@@ -1,134 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Does the dependency relation suffice to disambiguate ambiguous morphology?
|
4
|
-
#
|
5
|
-
require 'colorize'
|
6
|
-
require 'proiel'
|
7
|
-
|
8
|
-
if ARGV.length < 1
|
9
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
10
|
-
|
11
|
-
exit 1
|
12
|
-
end
|
13
|
-
|
14
|
-
tb = PROIEL::Treebank.new
|
15
|
-
tb.load_from_xml(ARGV)
|
16
|
-
|
17
|
-
# Harvest morphology
|
18
|
-
form_hash = {}
|
19
|
-
|
20
|
-
tb.sources.reject { |source| source.language != language_tag }.each do |source|
|
21
|
-
source.tokens.each do |token|
|
22
|
-
next unless token.form and token.pos and token.morphology and token.relation
|
23
|
-
|
24
|
-
# TODO: problem with using token.form is that sentence-initial words are sometimes capitalised
|
25
|
-
relation_hash = (form_hash[token.form] ||= {})
|
26
|
-
pos_hash = (relation_hash[token.relation] ||= {})
|
27
|
-
morphology_hash = (pos_hash[token.pos] ||= {})
|
28
|
-
morphology_hash[token.morphology] ||= 0
|
29
|
-
morphology_hash[token.morphology] += 1
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# Calculate by unique forms first
|
34
|
-
unique_pos = 0
|
35
|
-
relation_predicts_pos = 0
|
36
|
-
relation_does_not_predict_pos = 0
|
37
|
-
|
38
|
-
unique_morphology = 0
|
39
|
-
relation_predicts_morphology = 0
|
40
|
-
relation_does_not_predict_morphology = 0
|
41
|
-
|
42
|
-
form_hash.each do |form, h|
|
43
|
-
number_of_poses = 0
|
44
|
-
number_of_morphologies = 0
|
45
|
-
|
46
|
-
h.each do |_, i|
|
47
|
-
i.each do |_, j|
|
48
|
-
number_of_poses += 1
|
49
|
-
j.each do |_, k|
|
50
|
-
number_of_morphologies += 1
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
if number_of_poses == 1
|
56
|
-
unique_pos += 1
|
57
|
-
elsif h.all? { |_, i| i.keys.count == 1 }
|
58
|
-
relation_predicts_pos += 1
|
59
|
-
else
|
60
|
-
relation_does_not_predict_pos += 1
|
61
|
-
end
|
62
|
-
|
63
|
-
if number_of_morphologies == 1
|
64
|
-
unique_morphology += 1
|
65
|
-
elsif h.all? { |_, i| i.all? { |_, j| j.keys.count == 1 } }
|
66
|
-
relation_predicts_morphology += 1
|
67
|
-
else
|
68
|
-
relation_does_not_predict_morphology += 1
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
puts "By unique forms (types)"
|
73
|
-
puts "======================="
|
74
|
-
puts "Forms with a unique POS: #{unique_pos}"
|
75
|
-
puts "Forms whose relation predicts its POS: #{relation_predicts_pos}"
|
76
|
-
puts "Forms whose relation does not predict its POS: #{relation_does_not_predict_pos}"
|
77
|
-
|
78
|
-
puts "Forms with a unique morphology: #{unique_morphology}"
|
79
|
-
puts "Forms whose relation predicts its morphology: #{relation_predicts_morphology}"
|
80
|
-
puts "Forms whose relation does not predict its morphology: #{relation_does_not_predict_morphology}"
|
81
|
-
|
82
|
-
# Calculate by actual number of occurrences
|
83
|
-
unique_pos = 0
|
84
|
-
relation_predicts_pos = 0
|
85
|
-
relation_does_not_predict_pos = 0
|
86
|
-
|
87
|
-
unique_morphology = 0
|
88
|
-
relation_predicts_morphology = 0
|
89
|
-
relation_does_not_predict_morphology = 0
|
90
|
-
|
91
|
-
form_hash.each do |form, h|
|
92
|
-
n = 0
|
93
|
-
number_of_poses = 0
|
94
|
-
number_of_morphologies = 0
|
95
|
-
|
96
|
-
h.each do |_, i|
|
97
|
-
i.each do |_, j|
|
98
|
-
number_of_poses += 1
|
99
|
-
j.each do |_, k|
|
100
|
-
number_of_morphologies += 1
|
101
|
-
n += k
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
if number_of_poses == 1
|
107
|
-
unique_pos += n
|
108
|
-
elsif h.all? { |_, i| i.keys.count == 1 }
|
109
|
-
relation_predicts_pos += n
|
110
|
-
else
|
111
|
-
relation_does_not_predict_pos += n
|
112
|
-
end
|
113
|
-
|
114
|
-
if number_of_morphologies == 1
|
115
|
-
unique_morphology += n
|
116
|
-
elsif h.all? { |_, i| i.all? { |_, j| j.keys.count == 1 } }
|
117
|
-
relation_predicts_morphology += n
|
118
|
-
else
|
119
|
-
relation_does_not_predict_morphology += n
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
puts
|
124
|
-
puts "By occurrences of forms (tokens)"
|
125
|
-
puts "================================"
|
126
|
-
puts "Forms with a unique POS: #{unique_pos}"
|
127
|
-
puts "Forms whose relation predicts its POS: #{relation_predicts_pos}"
|
128
|
-
puts "Forms whose relation does not predict its POS: #{relation_does_not_predict_pos}"
|
129
|
-
|
130
|
-
puts "Forms with a unique morphology: #{unique_morphology}"
|
131
|
-
puts "Forms whose relation predicts its morphology: #{relation_predicts_morphology}"
|
132
|
-
puts "Forms whose relation does not predict its morphology: #{relation_does_not_predict_morphology}"
|
133
|
-
|
134
|
-
exit 0
|
@@ -1,30 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Word form occurrence extraction
|
4
|
-
#
|
5
|
-
require 'colorize'
|
6
|
-
require 'proiel'
|
7
|
-
|
8
|
-
if ARGV.length < 1
|
9
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
10
|
-
|
11
|
-
exit 1
|
12
|
-
end
|
13
|
-
|
14
|
-
tb = PROIEL::Treebank.new
|
15
|
-
tb.load_from_xml(ARGV)
|
16
|
-
|
17
|
-
form_index = {}
|
18
|
-
|
19
|
-
tb.sources.each do |source|
|
20
|
-
source.tokens.each do |token|
|
21
|
-
unless token.form.nil?
|
22
|
-
form_index[token.form] ||= []
|
23
|
-
form_index[token.form] << [source.id, token.id].join(':')
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
form_index.sort_by(&:first).each do |form, ids|
|
29
|
-
puts "#{form}: #{ids.join(', ')}"
|
30
|
-
end
|