proiel-cli 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +11 -3
- data/bin/proiel +1 -1
- data/lib/proiel/cli/commands/build.rb +91 -0
- data/lib/proiel/cli/commands/convert.rb +7 -2
- data/lib/proiel/cli/commands/dictionary.rb +46 -0
- data/lib/proiel/cli/commands/info.rb +1 -1
- data/lib/proiel/cli/commands/shell.rb +34 -0
- data/lib/proiel/cli/commands/tokenize.rb +2 -2
- data/lib/proiel/cli/commands/validate.rb +1 -1
- data/lib/proiel/cli/commands/visualize.rb +14 -11
- data/lib/proiel/cli/converters/conll-u/morphology.rb +162 -72
- data/lib/proiel/cli/converters/conll-u/syntax.rb +108 -62
- data/lib/proiel/cli/converters/conll-u.rb +648 -548
- data/lib/proiel/cli/converters/conll-x.rb +67 -52
- data/lib/proiel/cli/converters/lexc.rb +21 -23
- data/lib/proiel/cli/converters/proielxml.rb +173 -132
- data/lib/proiel/cli/converters/text.rb +69 -71
- data/lib/proiel/cli/converters/tiger.rb +110 -114
- data/lib/proiel/cli/converters/tiger2.rb +139 -141
- data/lib/proiel/cli/converters/tnt.rb +19 -15
- data/lib/proiel/cli/version.rb +1 -1
- data/lib/proiel/cli.rb +26 -1
- metadata +43 -58
- data/bin/setup +0 -8
- data/contrib/proiel-tnt-train +0 -15
- data/lib/proiel/cli/commands.rb +0 -28
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 7f03b2148dc4a7dff5176d490e8850507b4984df13dafd56b04dd81763708df7
|
|
4
|
+
data.tar.gz: 8e41729b4ab79c7f48c8b34f77ac65a58188e950879c473f41dcc1c74af17d4d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 838988757f5ee2360496297516047fe2715da30c8327aa411b6d3f395284d02a33123c263c238874e596aedcb53bbe9aeb79d63363fd712cb3480357c54bf87a
|
|
7
|
+
data.tar.gz: 1f629b9b036776f23a0c80851ea80f69082e8f2df8675ed268e0afdc86ac2e94264fecbaedd686a087475cc29e2abb7d99b090a4179c48c62516c402ea5179b5
|
data/README.md
CHANGED
|
@@ -11,7 +11,7 @@ This is a command-line interface for manipulating PROIEL treebanks.
|
|
|
11
11
|
|
|
12
12
|
## Installation
|
|
13
13
|
|
|
14
|
-
This library requires Ruby >= 2.
|
|
14
|
+
This library requires Ruby >= 2.4. Install as
|
|
15
15
|
|
|
16
16
|
```shell
|
|
17
17
|
gem install proiel-cli
|
|
@@ -42,8 +42,16 @@ Bug reports and pull requests are welcome on [GitHub](https://github.com/proiel/
|
|
|
42
42
|
|
|
43
43
|
## Development
|
|
44
44
|
|
|
45
|
-
To contribute to development, check out the git repository from [GitHub](https://github.com/proiel/proiel-cli) and run `
|
|
45
|
+
To contribute to development, check out the git repository from [GitHub](https://github.com/proiel/proiel-cli) and run `bundle install` to install all development dependencies. Then run `rake` to run the tests.
|
|
46
46
|
|
|
47
47
|
To install a development version of this gem, run `bundle exec rake install`.
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
### Releasing a new version
|
|
50
|
+
|
|
51
|
+
To release a new version:
|
|
52
|
+
|
|
53
|
+
1. Update the version number in `lib/proiel/cli/version.rb`.
|
|
54
|
+
2. Run `bundle exec rake release`. This will:
|
|
55
|
+
* Create a git tag for the version.
|
|
56
|
+
* Push git commits and tags to the remote repository.
|
|
57
|
+
* Push the `.gem` file to [rubygems.org](https://rubygems.org).
|
data/bin/proiel
CHANGED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
module PROIEL
|
|
2
|
+
module Commands
|
|
3
|
+
class Build < Command
|
|
4
|
+
class << self
|
|
5
|
+
def init_with_program(prog)
|
|
6
|
+
prog.command(:build) do |c|
|
|
7
|
+
c.syntax 'build resource'
|
|
8
|
+
c.description 'Build a derived resource'
|
|
9
|
+
|
|
10
|
+
c.command(:dictionary) do |f|
|
|
11
|
+
f.syntax 'output_filename [filename(s)]'
|
|
12
|
+
f.description 'Build a dictionary from treebank data'
|
|
13
|
+
f.action { |args, options| process_dictionary(args, options) }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
c.command(:dictionaries) do |f|
|
|
17
|
+
f.syntax '[filename(s)]'
|
|
18
|
+
f.description 'Build multiple dictionaries (one per language) from treebank data'
|
|
19
|
+
f.action { |args, options| process_dictionaries(args, options) }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
c.action do |_, _|
|
|
23
|
+
STDERR.puts 'Missing or invalid format. Use --help for more information.'
|
|
24
|
+
exit 1
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def process_dictionary(args, options)
|
|
30
|
+
if args.empty?
|
|
31
|
+
STDERR.puts 'Missing output filename. Use --help for more information.'
|
|
32
|
+
exit 1
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
output_filename, *input_filenames = args
|
|
36
|
+
|
|
37
|
+
tb = PROIEL::Treebank.new
|
|
38
|
+
dict = PROIEL::Dictionary::Builder.new
|
|
39
|
+
|
|
40
|
+
if input_filenames.empty?
|
|
41
|
+
STDERR.puts 'Reading from standard input...'.green if options['verbose']
|
|
42
|
+
|
|
43
|
+
tb.load_from_xml(STDIN)
|
|
44
|
+
tb.sources.each { |source| dict.add_source!(source) }
|
|
45
|
+
else
|
|
46
|
+
input_filenames.each do |filename|
|
|
47
|
+
STDERR.puts "Reading #{filename}...".green if options['verbose']
|
|
48
|
+
|
|
49
|
+
tb.load_from_xml(filename)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
tb.sources.each { |source| dict.add_source!(source) }
|
|
54
|
+
|
|
55
|
+
File.open(output_filename, 'w') do |f|
|
|
56
|
+
dict.to_xml(f)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def process_dictionaries(args, options)
|
|
61
|
+
dicts = {}
|
|
62
|
+
|
|
63
|
+
if args.empty?
|
|
64
|
+
STDERR.puts 'Reading from standard input...'.green if options['verbose']
|
|
65
|
+
|
|
66
|
+
tb = PROIEL::Treebank.new
|
|
67
|
+
tb.load_from_xml(STDIN)
|
|
68
|
+
else
|
|
69
|
+
tb = PROIEL::Treebank.new
|
|
70
|
+
|
|
71
|
+
args.each do |filename|
|
|
72
|
+
STDERR.puts "Reading #{filename}...".green if options['verbose']
|
|
73
|
+
tb.load_from_xml(filename)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
tb.sources.each do |source|
|
|
78
|
+
dicts[source.language] ||= PROIEL::Dictionary::Builder.new
|
|
79
|
+
dicts[source.language].add_source!(source)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
dicts.each do |language, dict|
|
|
83
|
+
File.open("#{language}.xml", 'w') do |f|
|
|
84
|
+
dict.to_xml(f)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -12,6 +12,8 @@ module PROIEL
|
|
|
12
12
|
f.description 'Convert to PROIEL XML format'
|
|
13
13
|
f.option 'remove-not-annotated', '--remove-not-annotated', 'Remove sentences that have not been annotated'
|
|
14
14
|
f.option 'remove-not-reviewed', '--remove-not-reviewed', 'Remove sentences that have not been reviewed'
|
|
15
|
+
f.option 'remove-annotated', '--remove-annotated', 'Remove sentences that have been annotated'
|
|
16
|
+
f.option 'remove-reviewed', '--remove-reviewed', 'Remove sentences that have been reviewed'
|
|
15
17
|
f.option 'remove-morphology', '--remove-morphology', 'Remove morphological annotation (part of speech, morphology and lemma)'
|
|
16
18
|
f.option 'remove-syntax', '--remove-syntax', 'Remove syntactic annotation (relation, head ID and slashes)'
|
|
17
19
|
f.option 'remove-information-structure', '--remove-information-structure', 'Remove informtion structure annotation (antecedent ID, information status and contrast group)'
|
|
@@ -20,13 +22,16 @@ module PROIEL
|
|
|
20
22
|
f.option 'remove-annotator', '--remove-annotator', 'Remove annotator information'
|
|
21
23
|
f.option 'remove-reviewer', '--remove-reviewer', 'Remove reviewer information'
|
|
22
24
|
f.option 'remove-empty-divs', '--remove-empty-divs', 'Remove div elements that do not contain any sentences'
|
|
25
|
+
f.option 'infer-alignments', '--infer-alignments', 'Add inferred alignments when possible'
|
|
26
|
+
f.option 'remove-unaligned-sources', '--remove-unaligned-sources', 'Remove sources that are not aligned'
|
|
23
27
|
f.action { |args, options| process(args, options, PROIEL::Converter::PROIELXML) }
|
|
24
28
|
end
|
|
25
29
|
|
|
26
30
|
c.command(:tnt) do |f|
|
|
27
31
|
f.syntax '[options] filename(s)'
|
|
28
32
|
f.description 'Convert to TNT/hunpos format'
|
|
29
|
-
f.option 'morphology', '-m', '--morphology', 'Include morphological tags'
|
|
33
|
+
f.option 'morphology', '-m', '--morphology', 'Include POS and morphological tags'
|
|
34
|
+
f.option 'pos', '-p', '--pos', 'Include POS tags'
|
|
30
35
|
f.action { |args, options| process(args, options, PROIEL::Converter::TNT) }
|
|
31
36
|
end
|
|
32
37
|
|
|
@@ -79,7 +84,7 @@ module PROIEL
|
|
|
79
84
|
tb = PROIEL::Treebank.new
|
|
80
85
|
|
|
81
86
|
if args.empty?
|
|
82
|
-
STDERR.puts
|
|
87
|
+
STDERR.puts 'Reading from standard input...'.green if options['verbose']
|
|
83
88
|
tb.load_from_xml(STDIN)
|
|
84
89
|
else
|
|
85
90
|
args.each do |filename|
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
module PROIEL
|
|
2
|
+
module Commands
|
|
3
|
+
class Dictionary < Command
|
|
4
|
+
class << self
|
|
5
|
+
def init_with_program(prog)
|
|
6
|
+
prog.command(:dictionary) do |c|
|
|
7
|
+
c.syntax 'dictionary [options] filename(s)'
|
|
8
|
+
c.description 'Build a dictionary'
|
|
9
|
+
|
|
10
|
+
c.option 'glosses', '--merge-glosses glosses.tsv', 'Merge glosses from an external file'
|
|
11
|
+
c.option 'gloss-languages', '--merge-gloss-languages eng,rus', 'Merge glosses from selected languages'
|
|
12
|
+
|
|
13
|
+
c.action { |args, options| process(args, options) }
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def process(args, options)
|
|
18
|
+
tb = PROIEL::Treebank.new
|
|
19
|
+
dict = PROIEL::DictionaryBuilder.new
|
|
20
|
+
|
|
21
|
+
args.each do |filename|
|
|
22
|
+
STDERR.puts "Reading #{filename}...".green if options['verbose']
|
|
23
|
+
|
|
24
|
+
tb.load_from_xml(filename)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if options['glosses']
|
|
28
|
+
languages = (options['gloss-languages'] || 'eng').split(',').map(&:to_sym)
|
|
29
|
+
if File.exist?(options['glosses'])
|
|
30
|
+
dict.add_external_glosses!(options['glosses'], languages)
|
|
31
|
+
else
|
|
32
|
+
STDERR.puts "#{options['glosses']} not found"
|
|
33
|
+
exit 1
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
tb.sources.each do |source|
|
|
38
|
+
dict.add_source!(source)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
dict.to_xml(STDOUT)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -45,7 +45,7 @@ module PROIEL
|
|
|
45
45
|
puts " Printed text: #{pretty_printed_text_info(source)}"
|
|
46
46
|
puts " Electr. text: #{pretty_electronic_text_info(source)}"
|
|
47
47
|
puts " Size: #{n} sentence(s), #{s.token_count} token(s)"
|
|
48
|
-
puts
|
|
48
|
+
puts ' Annotation: %.2f%% reviewed, %.2f%% annotated' % [r, a]
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
51
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
module PROIEL
|
|
2
|
+
module Commands
|
|
3
|
+
class Shell < Command
|
|
4
|
+
class << self
|
|
5
|
+
def init_with_program(prog)
|
|
6
|
+
prog.command(:shell) do |c|
|
|
7
|
+
c.syntax 'shell filename(s)'
|
|
8
|
+
c.description 'Launch a shell with the treebank loaded'
|
|
9
|
+
|
|
10
|
+
c.action do |args, options|
|
|
11
|
+
if args.empty?
|
|
12
|
+
STDERR.puts 'Missing filename(s). Use --help for more information.'
|
|
13
|
+
else
|
|
14
|
+
process(args, options)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def process(args, options)
|
|
21
|
+
tb = PROIEL::Treebank.new
|
|
22
|
+
|
|
23
|
+
args.each do |filename|
|
|
24
|
+
STDERR.puts "Reading #{filename}...".green if options['verbose']
|
|
25
|
+
|
|
26
|
+
tb.load_from_xml(filename)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
binding.pry
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -12,7 +12,7 @@ module PROIEL
|
|
|
12
12
|
end
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def process(args,
|
|
15
|
+
def process(args, _)
|
|
16
16
|
if args.empty?
|
|
17
17
|
STDERR.puts 'Missing filename. Use --help for more information.'
|
|
18
18
|
exit 1
|
|
@@ -143,7 +143,7 @@ module PROIEL
|
|
|
143
143
|
def read_body(f)
|
|
144
144
|
f.rewind
|
|
145
145
|
|
|
146
|
-
|
|
146
|
+
[].tap do |bdy|
|
|
147
147
|
f.each_line do |l|
|
|
148
148
|
case l
|
|
149
149
|
when /^%/
|
|
@@ -4,40 +4,42 @@ module PROIEL
|
|
|
4
4
|
class << self
|
|
5
5
|
def init_with_program(prog)
|
|
6
6
|
prog.command(:visualize) do |c|
|
|
7
|
-
c.syntax 'visualize
|
|
7
|
+
c.syntax 'visualize [OPTION(S)] FILENAME(S)'
|
|
8
8
|
c.description 'Visualize treebank graphs'
|
|
9
|
-
c.option 'objects', '--objects sentences|divs|sources', 'Objects to visualize (default: sentences)'
|
|
9
|
+
c.option 'objects', '--objects sentences|divs|sources|SENTENCE-ID', 'Objects to visualize (default: sentences)'
|
|
10
10
|
c.option 'format', '--format png|svg|dot', 'Output format (default: svg)'
|
|
11
|
-
c.option 'layout', '--layout classic|linearized|packed', 'Graph layout (default: classic)'
|
|
11
|
+
c.option 'layout', '--layout classic|linearized|packed|modern', 'Graph layout (default: classic)'
|
|
12
12
|
|
|
13
13
|
c.action { |args, options| process(args, options) }
|
|
14
14
|
end
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
+
LAYOUTS = %w(classic linearized packed modern)
|
|
18
|
+
|
|
17
19
|
def process(args, options)
|
|
18
20
|
objects = options['objects'] || 'sentences'
|
|
19
21
|
format = options['format'] || 'svg'
|
|
20
22
|
layout = options['layout'] || 'classic'
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
STDERR.puts
|
|
24
|
+
unless LAYOUTS.include?(layout)
|
|
25
|
+
STDERR.puts 'Invalid layout'
|
|
24
26
|
exit 1
|
|
25
27
|
end
|
|
26
28
|
|
|
27
|
-
if objects != 'sentences' and objects != 'divs' and objects != 'sources'
|
|
28
|
-
STDERR.puts
|
|
29
|
+
if objects != 'sentences' and objects != 'divs' and objects != 'sources' and objects.to_i.to_s != objects
|
|
30
|
+
STDERR.puts 'Invalid object type'
|
|
29
31
|
exit 1
|
|
30
32
|
end
|
|
31
33
|
|
|
32
34
|
if format != 'png' and format != 'svg' and format != 'dot'
|
|
33
|
-
STDERR.puts
|
|
35
|
+
STDERR.puts 'Invalid format'
|
|
34
36
|
exit 1
|
|
35
37
|
end
|
|
36
38
|
|
|
37
39
|
tb = PROIEL::Treebank.new
|
|
38
40
|
|
|
39
41
|
if args.empty?
|
|
40
|
-
STDERR.puts
|
|
42
|
+
STDERR.puts 'Reading from standard input...'.green if options['verbose']
|
|
41
43
|
tb.load_from_xml(STDIN)
|
|
42
44
|
else
|
|
43
45
|
args.each do |filename|
|
|
@@ -50,14 +52,15 @@ module PROIEL
|
|
|
50
52
|
tb.sources.each do |source|
|
|
51
53
|
case objects
|
|
52
54
|
when 'sources'
|
|
53
|
-
puts
|
|
55
|
+
puts 'This can take a very, very long time... Be patient!'
|
|
54
56
|
save_graph layout, format, source
|
|
55
57
|
when 'divs'
|
|
56
58
|
save_graphs source.divs, layout, format, source.id, source.divs.count
|
|
57
59
|
when 'sentences'
|
|
58
60
|
save_graphs source.sentences, layout, format, source.id, source.sentences.count
|
|
59
61
|
else
|
|
60
|
-
|
|
62
|
+
object = tb.find_sentence(objects.to_i)
|
|
63
|
+
save_graph(layout, format, object) if object
|
|
61
64
|
end
|
|
62
65
|
end
|
|
63
66
|
end
|