proiel-cli 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/proiel/cli/converters/proielxml.rb +1 -1
- data/lib/proiel/cli/version.rb +1 -1
- metadata +2 -7
- data/examples/decision-tree.rb +0 -41
- data/examples/dep-pos-cooccurrences.rb +0 -84
- data/examples/lint-rules.rb +0 -174
- data/examples/relation-as-disambiguator.rb +0 -134
- data/examples/word-occurrences.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bfb4db333ae791d4171490c7b3d316364b205729
|
4
|
+
data.tar.gz: fd001265bdcc8e75e49fc1e2a188f7c63471bb62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29cb26e28f22486db097b982d7e2cd4783f2b38d8d6c13fb06309fb25377791cd8248c88627b7d5b72c3adfe0e958c9696b2508fcb975e9a8fdf516a9825963b
|
7
|
+
data.tar.gz: b565292467456b10415039f0464bdfafcae5fecf55747dd3b9ec2473313ba08789d49431ecb06d649596bfd98031640481efccd9b5be58b6b565d13dcc3a1161
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# PROIEL command-line interface
|
2
2
|
|
3
|
-
This is a command-line interface for
|
3
|
+
This is a command-line interface for manipulating PROIEL treebanks.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
-
Install as
|
7
|
+
This library requires Ruby >= 2.1. Install as
|
8
8
|
|
9
9
|
```shell
|
10
10
|
gem install proiel-cli
|
@@ -88,7 +88,7 @@ module PROIEL
|
|
88
88
|
unless token.slashes.empty? or options['remove-syntax'] # this extra test avoids <token></token> style XML
|
89
89
|
builder.token(attrs) do
|
90
90
|
token.slashes.each do |relation, target_id|
|
91
|
-
builder.slash("target-id"
|
91
|
+
builder.slash(:"target-id" => target_id, relation: relation)
|
92
92
|
end
|
93
93
|
end
|
94
94
|
else
|
data/lib/proiel/cli/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proiel-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marius L. Jøhndal
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: builder
|
@@ -197,11 +197,6 @@ files:
|
|
197
197
|
- contrib/proiel-maltparser-parse
|
198
198
|
- contrib/proiel-maltparser-train
|
199
199
|
- contrib/proiel-tnt-train
|
200
|
-
- examples/decision-tree.rb
|
201
|
-
- examples/dep-pos-cooccurrences.rb
|
202
|
-
- examples/lint-rules.rb
|
203
|
-
- examples/relation-as-disambiguator.rb
|
204
|
-
- examples/word-occurrences.rb
|
205
200
|
- lib/proiel/cli.rb
|
206
201
|
- lib/proiel/cli/commands.rb
|
207
202
|
- lib/proiel/cli/commands/convert.rb
|
data/examples/decision-tree.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Train a decision tree on the (head_relation, head_pos, head_lemma,
|
4
|
-
# child_relation, child_pos, child_lemma) and then predict the child_relation
|
5
|
-
# of an unknown child.
|
6
|
-
#
|
7
|
-
require 'colorize'
|
8
|
-
require 'decisiontree'
|
9
|
-
require 'proiel'
|
10
|
-
|
11
|
-
if ARGV.length < 1
|
12
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
13
|
-
exit 1
|
14
|
-
end
|
15
|
-
|
16
|
-
tb = PROIEL::Treebank.new
|
17
|
-
tb.load_from_xml(ARGV)
|
18
|
-
|
19
|
-
tokens = {}
|
20
|
-
|
21
|
-
tb.sources.each do |source|
|
22
|
-
source.tokens.each do |token|
|
23
|
-
tokens[token.id.to_i] = [token.relation, token.part_of_speech, token.lemma, token.head_id]
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
training_data = tokens.map do |_, (child_relation, child_pos, child_lemma, head_id)|
|
28
|
-
if head_id
|
29
|
-
head = tokens[head_id.to_i]
|
30
|
-
head_relation, head_pos, head_lemma, _ = *head
|
31
|
-
|
32
|
-
[head_pos || '', head_lemma || '', head_relation, child_pos || '', child_lemma || '', child_relation]
|
33
|
-
end
|
34
|
-
end.compact
|
35
|
-
|
36
|
-
attributes = %w(head_pos head_lemma head_relation child_pos child_lemma)
|
37
|
-
dr = DecisionTree::ID3Tree.new(attributes, training_data, 'pred', :discrete)
|
38
|
-
dr.train
|
39
|
-
dr.save_to_file("dr.marshal")
|
40
|
-
|
41
|
-
p dr.predict(["Ne", "Gallia", "sub", "Px", "omnis"])
|
@@ -1,84 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'proiel'
|
3
|
-
require 'colorize'
|
4
|
-
require 'terminal-table'
|
5
|
-
|
6
|
-
if ARGV.length < 1
|
7
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
8
|
-
exit 1
|
9
|
-
end
|
10
|
-
|
11
|
-
tb = PROIEL::Treebank.new
|
12
|
-
tb.load_from_xml(ARGV)
|
13
|
-
|
14
|
-
# Present by POS
|
15
|
-
relations = tb.annotation.relation_tags.keys
|
16
|
-
|
17
|
-
c = {}
|
18
|
-
tb.sources.each do |s|
|
19
|
-
s.tokens.each do |t|
|
20
|
-
next if t.pos.nil? or t.relation.nil?
|
21
|
-
|
22
|
-
c[t.pos] ||= {}
|
23
|
-
c[t.pos][t.relation] ||= 0
|
24
|
-
c[t.pos][t.relation] += 1
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
rows = []
|
29
|
-
c.sort_by(&:first).each do |pos, d|
|
30
|
-
total = d.inject(0) { |a, (k, v)| a + v }
|
31
|
-
|
32
|
-
rows << [pos] + relations.map do |r|
|
33
|
-
n = d[r ? r.to_s : nil]
|
34
|
-
|
35
|
-
if n and n < total * 0.001
|
36
|
-
n.to_s.red
|
37
|
-
elsif n and n > total * 0.999
|
38
|
-
n.to_s.green
|
39
|
-
else
|
40
|
-
n
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
table = Terminal::Table.new headings: ['Part of speech'] + relations, rows: rows
|
46
|
-
puts table
|
47
|
-
puts "(red = relation occurs for less than 0.1% of tokens with this POS; green = relation occurs for more than 99.9% of tokens with this POS)"
|
48
|
-
puts
|
49
|
-
|
50
|
-
# Present by relation
|
51
|
-
poses = tb.annotation.part_of_speech_tags.keys
|
52
|
-
|
53
|
-
c = {}
|
54
|
-
|
55
|
-
tb.sources.each do |s|
|
56
|
-
s.tokens.each do |t|
|
57
|
-
next if t.pos.nil? or t.relation.nil?
|
58
|
-
|
59
|
-
c[t.relation] ||= {}
|
60
|
-
c[t.relation][t.pos] ||= 0
|
61
|
-
c[t.relation][t.pos] += 1
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
rows = []
|
66
|
-
c.sort_by(&:first).each do |relation, d|
|
67
|
-
total = d.inject(0) { |a, (k, v)| a + v }
|
68
|
-
|
69
|
-
rows << [relation] + poses.map do |r|
|
70
|
-
n = d[r ? r.to_s : nil]
|
71
|
-
|
72
|
-
if n and n < total * 0.001
|
73
|
-
n.to_s.red
|
74
|
-
elsif n and n > total * 0.999
|
75
|
-
n.to_s.green
|
76
|
-
else
|
77
|
-
n
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
table = Terminal::Table.new headings: ['Relation'] + poses, rows: rows
|
83
|
-
puts table
|
84
|
-
puts "(red = POS occurs for less than 0.1% of tokens with this relation; green = POS occurs for more than 99.9% of tokens with this relation)"
|
data/examples/lint-rules.rb
DELETED
@@ -1,174 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Very simple testing of implicational feature rules. Example rules only
|
4
|
-
# apply to Latin.
|
5
|
-
#
|
6
|
-
require 'colorize'
|
7
|
-
require 'proiel'
|
8
|
-
|
9
|
-
VIOLATIONS = {}
|
10
|
-
|
11
|
-
def report_violation(token, message)
|
12
|
-
VIOLATIONS[message] ||= []
|
13
|
-
VIOLATIONS[message] << token
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_token(token, rules, dependent_rules)
|
17
|
-
rules.each do |match_features, test_alternatives|
|
18
|
-
f = token.features + ["\"#{token.form}\""]
|
19
|
-
|
20
|
-
if (match_features - f).empty?
|
21
|
-
unless test_alternatives.any? { |test_alternative| token.features.include?(test_alternative) }
|
22
|
-
report_violation(token, "#{match_features.join(' ')}")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
dependent_rules.each do |match_features, test_alternatives|
|
28
|
-
f = token.features + ["\"#{token.form}\""]
|
29
|
-
|
30
|
-
if (match_features - f).empty?
|
31
|
-
t = token.children.all? do |dependent|
|
32
|
-
test_alternatives.any? { |test_alternative| dependent.features.include?(test_alternative) }
|
33
|
-
end
|
34
|
-
|
35
|
-
unless t
|
36
|
-
report_violation(token, "#{match_features.join(' ')} → dependents()")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def load_rules
|
43
|
-
rules = {}
|
44
|
-
dependent_rules = {}
|
45
|
-
|
46
|
-
DATA.each do |rule|
|
47
|
-
rule.chomp!
|
48
|
-
rule.sub!(/\s*#.*$/, '')
|
49
|
-
|
50
|
-
next if rule.empty?
|
51
|
-
|
52
|
-
match_features, test = rule.split(/\s*→\s*/)
|
53
|
-
match_features = match_features.split(/\s+/)
|
54
|
-
|
55
|
-
if test[/\s*dependents\(([^)]*)\)\s*/]
|
56
|
-
dependent_rules[match_features] ||= []
|
57
|
-
dependent_rules[match_features] << $1
|
58
|
-
test.sub!(/\s*dependents\([^)]*\)\s*/, '')
|
59
|
-
end
|
60
|
-
|
61
|
-
if test != ''
|
62
|
-
rules[match_features] ||= []
|
63
|
-
rules[match_features] << test
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
[rules, dependent_rules]
|
68
|
-
end
|
69
|
-
|
70
|
-
if ARGV.length < 1
|
71
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
72
|
-
exit 1
|
73
|
-
end
|
74
|
-
|
75
|
-
tb = PROIEL::Treebank.new
|
76
|
-
tb.load_from_xml(ARGV)
|
77
|
-
|
78
|
-
rules, dependent_rules = load_rules
|
79
|
-
|
80
|
-
tb.sources.each do |source|
|
81
|
-
source.sentences.each do |sentence|
|
82
|
-
if sentence.status == 'reviewed'
|
83
|
-
sentence.tokens.each do |token|
|
84
|
-
test_token(token, rules, dependent_rules)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
base_url = 'http://foni.uio.no:3000'
|
91
|
-
|
92
|
-
puts "<h1>PROIEL lint report</h1>"
|
93
|
-
|
94
|
-
VIOLATIONS.each do |rule, tokens|
|
95
|
-
puts "<h2>#{rule}</h2><ul>"
|
96
|
-
tokens.each do |token|
|
97
|
-
puts "<li>Token <a href='#{base_url}/tokens/#{token.id}'>#{token.id}</a> in sentence <a href='#{base_url}/sentences/#{token.sentence.id}'>#{token.sentence.id}</a></li>"
|
98
|
-
end
|
99
|
-
puts "</ul>"
|
100
|
-
end
|
101
|
-
|
102
|
-
__END__
|
103
|
-
|
104
|
-
# Gerundives
|
105
|
-
gdv nom → xobj # modal gerundive heading a main clause
|
106
|
-
|
107
|
-
gdv acc → comp # modal gerundive heading an AcI, or in the _curo faciendum_ type
|
108
|
-
gdv acc → xobj # modal gerundive heading an AcI with an overt auxiliary
|
109
|
-
gdv acc → obl # as argument of a preposition
|
110
|
-
gdv acc → xadv # in the _do librum legendum_ type
|
111
|
-
|
112
|
-
gdv gen → atr # in the _tempus dicendi_ type
|
113
|
-
gdv gen → narg # in the _facultas dicendi_ type
|
114
|
-
|
115
|
-
gdv abl → obl # as argument of a preposition
|
116
|
-
gdv abl → abl # in circumstantial adjuncts of various types
|
117
|
-
|
118
|
-
# Gerunds
|
119
|
-
ger nom → 0 # invalid case for a gerundive
|
120
|
-
|
121
|
-
ger acc → obl # as argument of a preposition
|
122
|
-
|
123
|
-
ger gen → atr # in the _tempus dicendi_ type
|
124
|
-
ger gen → narg # in the _facultas dicendi_ type
|
125
|
-
|
126
|
-
ger abl → obl # as argument of a preposition
|
127
|
-
ger abl → abl # in circumstantial adjuncts of various types
|
128
|
-
|
129
|
-
# Reflexive pronouns
|
130
|
-
persrefl nom → 0
|
131
|
-
|
132
|
-
persrefl acc → sub
|
133
|
-
persrefl acc → obj
|
134
|
-
persrefl acc → obl
|
135
|
-
|
136
|
-
persrefl dat → obl
|
137
|
-
persrefl dat → adv
|
138
|
-
persrefl dat → ag
|
139
|
-
|
140
|
-
persrefl abl → obl
|
141
|
-
persrefl abl → sub
|
142
|
-
|
143
|
-
persrefl "se" → acc
|
144
|
-
persrefl "se" → abl
|
145
|
-
|
146
|
-
persrefl "sese" → acc
|
147
|
-
persrefl "sese" → abl
|
148
|
-
|
149
|
-
persrefl "sibi" → dat
|
150
|
-
|
151
|
-
# Personal pronouns
|
152
|
-
perspron nom → sub
|
153
|
-
|
154
|
-
perspron acc → sub
|
155
|
-
perspron acc → obj
|
156
|
-
perspron acc → obl
|
157
|
-
|
158
|
-
perspron dat → obl
|
159
|
-
perspron dat → adv
|
160
|
-
perspron dat → ag
|
161
|
-
|
162
|
-
perspron abl → obl
|
163
|
-
perspron abl → sub
|
164
|
-
|
165
|
-
# The dependent of the complementisers _ut_ and _ne_ should be a PRED or an AUX
|
166
|
-
subj "ut" → dependents(pred) # the standard case, a predicate heading a clause
|
167
|
-
subj "ut" → dependents(aux) # some particle-like material dependent on the complementiser
|
168
|
-
|
169
|
-
subj "ne" → dependents(pred)
|
170
|
-
subj "ne" → dependents(aux)
|
171
|
-
|
172
|
-
# Particles and adverbs
|
173
|
-
"iam" → adverb adv
|
174
|
-
"iam" → adverb aux # possibly
|
@@ -1,134 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Does the dependency relation suffice to disambiguate ambiguous morphology?
|
4
|
-
#
|
5
|
-
require 'colorize'
|
6
|
-
require 'proiel'
|
7
|
-
|
8
|
-
if ARGV.length < 1
|
9
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
10
|
-
|
11
|
-
exit 1
|
12
|
-
end
|
13
|
-
|
14
|
-
tb = PROIEL::Treebank.new
|
15
|
-
tb.load_from_xml(ARGV)
|
16
|
-
|
17
|
-
# Harvest morphology
|
18
|
-
form_hash = {}
|
19
|
-
|
20
|
-
tb.sources.reject { |source| source.language != language_tag }.each do |source|
|
21
|
-
source.tokens.each do |token|
|
22
|
-
next unless token.form and token.pos and token.morphology and token.relation
|
23
|
-
|
24
|
-
# TODO: problem with using token.form is that sentence-initial words are sometimes capitalised
|
25
|
-
relation_hash = (form_hash[token.form] ||= {})
|
26
|
-
pos_hash = (relation_hash[token.relation] ||= {})
|
27
|
-
morphology_hash = (pos_hash[token.pos] ||= {})
|
28
|
-
morphology_hash[token.morphology] ||= 0
|
29
|
-
morphology_hash[token.morphology] += 1
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# Calculate by unique forms first
|
34
|
-
unique_pos = 0
|
35
|
-
relation_predicts_pos = 0
|
36
|
-
relation_does_not_predict_pos = 0
|
37
|
-
|
38
|
-
unique_morphology = 0
|
39
|
-
relation_predicts_morphology = 0
|
40
|
-
relation_does_not_predict_morphology = 0
|
41
|
-
|
42
|
-
form_hash.each do |form, h|
|
43
|
-
number_of_poses = 0
|
44
|
-
number_of_morphologies = 0
|
45
|
-
|
46
|
-
h.each do |_, i|
|
47
|
-
i.each do |_, j|
|
48
|
-
number_of_poses += 1
|
49
|
-
j.each do |_, k|
|
50
|
-
number_of_morphologies += 1
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
if number_of_poses == 1
|
56
|
-
unique_pos += 1
|
57
|
-
elsif h.all? { |_, i| i.keys.count == 1 }
|
58
|
-
relation_predicts_pos += 1
|
59
|
-
else
|
60
|
-
relation_does_not_predict_pos += 1
|
61
|
-
end
|
62
|
-
|
63
|
-
if number_of_morphologies == 1
|
64
|
-
unique_morphology += 1
|
65
|
-
elsif h.all? { |_, i| i.all? { |_, j| j.keys.count == 1 } }
|
66
|
-
relation_predicts_morphology += 1
|
67
|
-
else
|
68
|
-
relation_does_not_predict_morphology += 1
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
puts "By unique forms (types)"
|
73
|
-
puts "======================="
|
74
|
-
puts "Forms with a unique POS: #{unique_pos}"
|
75
|
-
puts "Forms whose relation predicts its POS: #{relation_predicts_pos}"
|
76
|
-
puts "Forms whose relation does not predict its POS: #{relation_does_not_predict_pos}"
|
77
|
-
|
78
|
-
puts "Forms with a unique morphology: #{unique_morphology}"
|
79
|
-
puts "Forms whose relation predicts its morphology: #{relation_predicts_morphology}"
|
80
|
-
puts "Forms whose relation does not predict its morphology: #{relation_does_not_predict_morphology}"
|
81
|
-
|
82
|
-
# Calculate by actual number of occurrences
|
83
|
-
unique_pos = 0
|
84
|
-
relation_predicts_pos = 0
|
85
|
-
relation_does_not_predict_pos = 0
|
86
|
-
|
87
|
-
unique_morphology = 0
|
88
|
-
relation_predicts_morphology = 0
|
89
|
-
relation_does_not_predict_morphology = 0
|
90
|
-
|
91
|
-
form_hash.each do |form, h|
|
92
|
-
n = 0
|
93
|
-
number_of_poses = 0
|
94
|
-
number_of_morphologies = 0
|
95
|
-
|
96
|
-
h.each do |_, i|
|
97
|
-
i.each do |_, j|
|
98
|
-
number_of_poses += 1
|
99
|
-
j.each do |_, k|
|
100
|
-
number_of_morphologies += 1
|
101
|
-
n += k
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
if number_of_poses == 1
|
107
|
-
unique_pos += n
|
108
|
-
elsif h.all? { |_, i| i.keys.count == 1 }
|
109
|
-
relation_predicts_pos += n
|
110
|
-
else
|
111
|
-
relation_does_not_predict_pos += n
|
112
|
-
end
|
113
|
-
|
114
|
-
if number_of_morphologies == 1
|
115
|
-
unique_morphology += n
|
116
|
-
elsif h.all? { |_, i| i.all? { |_, j| j.keys.count == 1 } }
|
117
|
-
relation_predicts_morphology += n
|
118
|
-
else
|
119
|
-
relation_does_not_predict_morphology += n
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
puts
|
124
|
-
puts "By occurrences of forms (tokens)"
|
125
|
-
puts "================================"
|
126
|
-
puts "Forms with a unique POS: #{unique_pos}"
|
127
|
-
puts "Forms whose relation predicts its POS: #{relation_predicts_pos}"
|
128
|
-
puts "Forms whose relation does not predict its POS: #{relation_does_not_predict_pos}"
|
129
|
-
|
130
|
-
puts "Forms with a unique morphology: #{unique_morphology}"
|
131
|
-
puts "Forms whose relation predicts its morphology: #{relation_predicts_morphology}"
|
132
|
-
puts "Forms whose relation does not predict its morphology: #{relation_does_not_predict_morphology}"
|
133
|
-
|
134
|
-
exit 0
|
@@ -1,30 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Word form occurrence extraction
|
4
|
-
#
|
5
|
-
require 'colorize'
|
6
|
-
require 'proiel'
|
7
|
-
|
8
|
-
if ARGV.length < 1
|
9
|
-
STDERR.puts "Usage: #{$0} treebank-files(s)"
|
10
|
-
|
11
|
-
exit 1
|
12
|
-
end
|
13
|
-
|
14
|
-
tb = PROIEL::Treebank.new
|
15
|
-
tb.load_from_xml(ARGV)
|
16
|
-
|
17
|
-
form_index = {}
|
18
|
-
|
19
|
-
tb.sources.each do |source|
|
20
|
-
source.tokens.each do |token|
|
21
|
-
unless token.form.nil?
|
22
|
-
form_index[token.form] ||= []
|
23
|
-
form_index[token.form] << [source.id, token.id].join(':')
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
form_index.sort_by(&:first).each do |form, ids|
|
29
|
-
puts "#{form}: #{ids.join(', ')}"
|
30
|
-
end
|