proiel-cli 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 98027bdd669bde3fa19db4f6159e3b36d5024f1f
4
- data.tar.gz: 4e6019add4436629e2488fc03d19f3d040c48149
2
+ SHA256:
3
+ metadata.gz: 7f03b2148dc4a7dff5176d490e8850507b4984df13dafd56b04dd81763708df7
4
+ data.tar.gz: 8e41729b4ab79c7f48c8b34f77ac65a58188e950879c473f41dcc1c74af17d4d
5
5
  SHA512:
6
- metadata.gz: 301a294838fb2ee805bcabbdb5425d39815749c8d4ccea9e8f7883ccaee4fee09ffb0cd45ad8ea0bef06cfb7b80a241b4acb87d224079101863be2d8f0614c7a
7
- data.tar.gz: ca1bbaadca8be10e714984ab8ae1f460e6c61d3e5c7b09882c8819e95aadcccc152c69e53e1bd0072be6fc30491c422a84c6886fbd4f7f203b17cfa517f92310
6
+ metadata.gz: 838988757f5ee2360496297516047fe2715da30c8327aa411b6d3f395284d02a33123c263c238874e596aedcb53bbe9aeb79d63363fd712cb3480357c54bf87a
7
+ data.tar.gz: 1f629b9b036776f23a0c80851ea80f69082e8f2df8675ed268e0afdc86ac2e94264fecbaedd686a087475cc29e2abb7d99b090a4179c48c62516c402ea5179b5
data/README.md CHANGED
@@ -1,10 +1,17 @@
1
1
  # PROIEL command-line interface
2
2
 
3
+ ## Status
4
+
5
+ [![Gem Version](https://badge.fury.io/rb/proiel-cli.svg)](http://badge.fury.io/rb/proiel-cli)
6
+ [![Build Status](https://secure.travis-ci.org/proiel/proiel-cli.svg?branch=master)](http://travis-ci.org/proiel/proiel-cli?branch=master)
7
+
8
+ ## Description
9
+
3
10
  This is a command-line interface for manipulating PROIEL treebanks.
4
11
 
5
12
  ## Installation
6
13
 
7
- This library requires Ruby >= 2.1. Install as
14
+ This library requires Ruby >= 2.4. Install as
8
15
 
9
16
  ```shell
10
17
  gem install proiel-cli
@@ -35,8 +42,16 @@ Bug reports and pull requests are welcome on [GitHub](https://github.com/proiel/
35
42
 
36
43
  ## Development
37
44
 
38
- To contribute to development, check out the git repository from [GitHub](https://github.com/proiel/proiel-cli) and run `bin/setup` to install all development dependencies. Then run `rake` to run the tests.
45
+ To contribute to development, check out the git repository from [GitHub](https://github.com/proiel/proiel-cli) and run `bundle install` to install all development dependencies. Then run `rake` to run the tests.
39
46
 
40
47
  To install a development version of this gem, run `bundle exec rake install`.
41
48
 
42
- To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the gem to [rubygems.org](https://rubygems.org).
49
+ ### Releasing a new version
50
+
51
+ To release a new version:
52
+
53
+ 1. Update the version number in `lib/proiel/cli/version.rb`.
54
+ 2. Run `bundle exec rake release`. This will:
55
+ * Create a git tag for the version.
56
+ * Push git commits and tags to the remote repository.
57
+ * Push the `.gem` file to [rubygems.org](https://rubygems.org).
data/bin/proiel CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  STDOUT.sync = true
3
3
 
4
- $:.unshift File.join(File.dirname(__FILE__), *%w{ .. lib })
4
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
5
5
 
6
6
  require 'proiel/cli'
7
7
 
@@ -0,0 +1,91 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Build < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:build) do |c|
7
+ c.syntax 'build resource'
8
+ c.description 'Build a derived resource'
9
+
10
+ c.command(:dictionary) do |f|
11
+ f.syntax 'output_filename [filename(s)]'
12
+ f.description 'Build a dictionary from treebank data'
13
+ f.action { |args, options| process_dictionary(args, options) }
14
+ end
15
+
16
+ c.command(:dictionaries) do |f|
17
+ f.syntax '[filename(s)]'
18
+ f.description 'Build multiple dictionaries (one per language) from treebank data'
19
+ f.action { |args, options| process_dictionaries(args, options) }
20
+ end
21
+
22
+ c.action do |_, _|
23
+ STDERR.puts 'Missing or invalid format. Use --help for more information.'
24
+ exit 1
25
+ end
26
+ end
27
+ end
28
+
29
+ def process_dictionary(args, options)
30
+ if args.empty?
31
+ STDERR.puts 'Missing output filename. Use --help for more information.'
32
+ exit 1
33
+ end
34
+
35
+ output_filename, *input_filenames = args
36
+
37
+ tb = PROIEL::Treebank.new
38
+ dict = PROIEL::Dictionary::Builder.new
39
+
40
+ if input_filenames.empty?
41
+ STDERR.puts 'Reading from standard input...'.green if options['verbose']
42
+
43
+ tb.load_from_xml(STDIN)
44
+ tb.sources.each { |source| dict.add_source!(source) }
45
+ else
46
+ input_filenames.each do |filename|
47
+ STDERR.puts "Reading #{filename}...".green if options['verbose']
48
+
49
+ tb.load_from_xml(filename)
50
+ end
51
+ end
52
+
53
+ tb.sources.each { |source| dict.add_source!(source) }
54
+
55
+ File.open(output_filename, 'w') do |f|
56
+ dict.to_xml(f)
57
+ end
58
+ end
59
+
60
+ def process_dictionaries(args, options)
61
+ dicts = {}
62
+
63
+ if args.empty?
64
+ STDERR.puts 'Reading from standard input...'.green if options['verbose']
65
+
66
+ tb = PROIEL::Treebank.new
67
+ tb.load_from_xml(STDIN)
68
+ else
69
+ tb = PROIEL::Treebank.new
70
+
71
+ args.each do |filename|
72
+ STDERR.puts "Reading #{filename}...".green if options['verbose']
73
+ tb.load_from_xml(filename)
74
+ end
75
+ end
76
+
77
+ tb.sources.each do |source|
78
+ dicts[source.language] ||= PROIEL::Dictionary::Builder.new
79
+ dicts[source.language].add_source!(source)
80
+ end
81
+
82
+ dicts.each do |language, dict|
83
+ File.open("#{language}.xml", 'w') do |f|
84
+ dict.to_xml(f)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -12,6 +12,8 @@ module PROIEL
12
12
  f.description 'Convert to PROIEL XML format'
13
13
  f.option 'remove-not-annotated', '--remove-not-annotated', 'Remove sentences that have not been annotated'
14
14
  f.option 'remove-not-reviewed', '--remove-not-reviewed', 'Remove sentences that have not been reviewed'
15
+ f.option 'remove-annotated', '--remove-annotated', 'Remove sentences that have been annotated'
16
+ f.option 'remove-reviewed', '--remove-reviewed', 'Remove sentences that have been reviewed'
15
17
  f.option 'remove-morphology', '--remove-morphology', 'Remove morphological annotation (part of speech, morphology and lemma)'
16
18
  f.option 'remove-syntax', '--remove-syntax', 'Remove syntactic annotation (relation, head ID and slashes)'
17
19
  f.option 'remove-information-structure', '--remove-information-structure', 'Remove informtion structure annotation (antecedent ID, information status and contrast group)'
@@ -20,13 +22,16 @@ module PROIEL
20
22
  f.option 'remove-annotator', '--remove-annotator', 'Remove annotator information'
21
23
  f.option 'remove-reviewer', '--remove-reviewer', 'Remove reviewer information'
22
24
  f.option 'remove-empty-divs', '--remove-empty-divs', 'Remove div elements that do not contain any sentences'
25
+ f.option 'infer-alignments', '--infer-alignments', 'Add inferred alignments when possible'
26
+ f.option 'remove-unaligned-sources', '--remove-unaligned-sources', 'Remove sources that are not aligned'
23
27
  f.action { |args, options| process(args, options, PROIEL::Converter::PROIELXML) }
24
28
  end
25
29
 
26
30
  c.command(:tnt) do |f|
27
31
  f.syntax '[options] filename(s)'
28
32
  f.description 'Convert to TNT/hunpos format'
29
- f.option 'morphology', '-m', '--morphology', 'Include morphological tags'
33
+ f.option 'morphology', '-m', '--morphology', 'Include POS and morphological tags'
34
+ f.option 'pos', '-p', '--pos', 'Include POS tags'
30
35
  f.action { |args, options| process(args, options, PROIEL::Converter::TNT) }
31
36
  end
32
37
 
@@ -79,7 +84,7 @@ module PROIEL
79
84
  tb = PROIEL::Treebank.new
80
85
 
81
86
  if args.empty?
82
- STDERR.puts "Reading from standard input...".green if options['verbose']
87
+ STDERR.puts 'Reading from standard input...'.green if options['verbose']
83
88
  tb.load_from_xml(STDIN)
84
89
  else
85
90
  args.each do |filename|
@@ -0,0 +1,46 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Dictionary < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:dictionary) do |c|
7
+ c.syntax 'dictionary [options] filename(s)'
8
+ c.description 'Build a dictionary'
9
+
10
+ c.option 'glosses', '--merge-glosses glosses.tsv', 'Merge glosses from an external file'
11
+ c.option 'gloss-languages', '--merge-gloss-languages eng,rus', 'Merge glosses from selected languages'
12
+
13
+ c.action { |args, options| process(args, options) }
14
+ end
15
+ end
16
+
17
+ def process(args, options)
18
+ tb = PROIEL::Treebank.new
19
+ dict = PROIEL::DictionaryBuilder.new
20
+
21
+ args.each do |filename|
22
+ STDERR.puts "Reading #{filename}...".green if options['verbose']
23
+
24
+ tb.load_from_xml(filename)
25
+ end
26
+
27
+ if options['glosses']
28
+ languages = (options['gloss-languages'] || 'eng').split(',').map(&:to_sym)
29
+ if File.exist?(options['glosses'])
30
+ dict.add_external_glosses!(options['glosses'], languages)
31
+ else
32
+ STDERR.puts "#{options['glosses']} not found"
33
+ exit 1
34
+ end
35
+ end
36
+
37
+ tb.sources.each do |source|
38
+ dict.add_source!(source)
39
+ end
40
+
41
+ dict.to_xml(STDOUT)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -45,7 +45,7 @@ module PROIEL
45
45
  puts " Printed text: #{pretty_printed_text_info(source)}"
46
46
  puts " Electr. text: #{pretty_electronic_text_info(source)}"
47
47
  puts " Size: #{n} sentence(s), #{s.token_count} token(s)"
48
- puts " Annotation: %.2f%% reviewed, %.2f%% annotated" % [r, a]
48
+ puts ' Annotation: %.2f%% reviewed, %.2f%% annotated' % [r, a]
49
49
  end
50
50
  end
51
51
 
@@ -0,0 +1,34 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Shell < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:shell) do |c|
7
+ c.syntax 'shell filename(s)'
8
+ c.description 'Launch a shell with the treebank loaded'
9
+
10
+ c.action do |args, options|
11
+ if args.empty?
12
+ STDERR.puts 'Missing filename(s). Use --help for more information.'
13
+ else
14
+ process(args, options)
15
+ end
16
+ end
17
+ end
18
+ end
19
+
20
+ def process(args, options)
21
+ tb = PROIEL::Treebank.new
22
+
23
+ args.each do |filename|
24
+ STDERR.puts "Reading #{filename}...".green if options['verbose']
25
+
26
+ tb.load_from_xml(filename)
27
+ end
28
+
29
+ binding.pry
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -12,7 +12,7 @@ module PROIEL
12
12
  end
13
13
  end
14
14
 
15
- def process(args, options)
15
+ def process(args, _)
16
16
  if args.empty?
17
17
  STDERR.puts 'Missing filename. Use --help for more information.'
18
18
  exit 1
@@ -143,7 +143,7 @@ module PROIEL
143
143
  def read_body(f)
144
144
  f.rewind
145
145
 
146
- Array.new.tap do |bdy|
146
+ [].tap do |bdy|
147
147
  f.each_line do |l|
148
148
  case l
149
149
  when /^%/
@@ -10,7 +10,9 @@ module PROIEL
10
10
  end
11
11
  end
12
12
 
13
- def process(args, options)
13
+ def process(args, _)
14
+ exit_code = 0
15
+
14
16
  if args.empty?
15
17
  STDERR.puts 'Missing filename(s). Use --help for more information.'
16
18
  exit 1
@@ -23,8 +25,6 @@ module PROIEL
23
25
 
24
26
  if v.valid?
25
27
  puts "#{filename} is valid".green
26
-
27
- exit 0
28
28
  else
29
29
  puts "#{filename} is invalid".red
30
30
 
@@ -32,9 +32,11 @@ module PROIEL
32
32
  puts "* #{error}"
33
33
  end
34
34
 
35
- exit 1
35
+ exit_code = 1
36
36
  end
37
37
  end
38
+
39
+ exit exit_code
38
40
  end
39
41
  end
40
42
  end
@@ -4,40 +4,42 @@ module PROIEL
4
4
  class << self
5
5
  def init_with_program(prog)
6
6
  prog.command(:visualize) do |c|
7
- c.syntax 'visualize sentences|divs|sources FILENAME(S)'
7
+ c.syntax 'visualize [OPTION(S)] FILENAME(S)'
8
8
  c.description 'Visualize treebank graphs'
9
- c.option 'objects', '--objects sentences|divs|sources', 'Objects to visualize (default: sentences)'
9
+ c.option 'objects', '--objects sentences|divs|sources|SENTENCE-ID', 'Objects to visualize (default: sentences)'
10
10
  c.option 'format', '--format png|svg|dot', 'Output format (default: svg)'
11
- c.option 'layout', '--layout classic|linearized|packed', 'Graph layout (default: classic)'
11
+ c.option 'layout', '--layout classic|linearized|packed|modern', 'Graph layout (default: classic)'
12
12
 
13
13
  c.action { |args, options| process(args, options) }
14
14
  end
15
15
  end
16
16
 
17
+ LAYOUTS = %w(classic linearized packed modern)
18
+
17
19
  def process(args, options)
18
20
  objects = options['objects'] || 'sentences'
19
21
  format = options['format'] || 'svg'
20
22
  layout = options['layout'] || 'classic'
21
23
 
22
- if layout != 'classic' and layout != 'linearized' and layout != 'packed'
23
- STDERR.puts "Invalid layout"
24
+ unless LAYOUTS.include?(layout)
25
+ STDERR.puts 'Invalid layout'
24
26
  exit 1
25
27
  end
26
28
 
27
- if objects != 'sentences' and objects != 'divs' and objects != 'sources'
28
- STDERR.puts "Invalid object type"
29
+ if objects != 'sentences' and objects != 'divs' and objects != 'sources' and objects.to_i.to_s != objects
30
+ STDERR.puts 'Invalid object type'
29
31
  exit 1
30
32
  end
31
33
 
32
34
  if format != 'png' and format != 'svg' and format != 'dot'
33
- STDERR.puts "Invalid format"
35
+ STDERR.puts 'Invalid format'
34
36
  exit 1
35
37
  end
36
38
 
37
39
  tb = PROIEL::Treebank.new
38
40
 
39
41
  if args.empty?
40
- STDERR.puts "Reading from standard input...".green if options['verbose']
42
+ STDERR.puts 'Reading from standard input...'.green if options['verbose']
41
43
  tb.load_from_xml(STDIN)
42
44
  else
43
45
  args.each do |filename|
@@ -50,14 +52,15 @@ module PROIEL
50
52
  tb.sources.each do |source|
51
53
  case objects
52
54
  when 'sources'
53
- puts "This can take a very, very long time... Be patient!"
55
+ puts 'This can take a very, very long time... Be patient!'
54
56
  save_graph layout, format, source
55
57
  when 'divs'
56
58
  save_graphs source.divs, layout, format, source.id, source.divs.count
57
59
  when 'sentences'
58
60
  save_graphs source.sentences, layout, format, source.id, source.sentences.count
59
61
  else
60
- raise
62
+ object = tb.find_sentence(objects.to_i)
63
+ save_graph(layout, format, object) if object
61
64
  end
62
65
  end
63
66
  end