RubyGems - proiel-cli - Versions diffs - 1.2.1 → 1.3.0 - Mend

proiel-cli 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +5 -5
data/README.md +11 -3
data/bin/proiel +1 -1
data/lib/proiel/cli/commands/build.rb +91 -0
data/lib/proiel/cli/commands/convert.rb +7 -2
data/lib/proiel/cli/commands/dictionary.rb +46 -0
data/lib/proiel/cli/commands/info.rb +1 -1
data/lib/proiel/cli/commands/shell.rb +34 -0
data/lib/proiel/cli/commands/tokenize.rb +2 -2
data/lib/proiel/cli/commands/validate.rb +1 -1
data/lib/proiel/cli/commands/visualize.rb +14 -11
data/lib/proiel/cli/converters/conll-u/morphology.rb +162 -72
data/lib/proiel/cli/converters/conll-u/syntax.rb +108 -62
data/lib/proiel/cli/converters/conll-u.rb +648 -548
data/lib/proiel/cli/converters/conll-x.rb +67 -52
data/lib/proiel/cli/converters/lexc.rb +21 -23
data/lib/proiel/cli/converters/proielxml.rb +173 -132
data/lib/proiel/cli/converters/text.rb +69 -71
data/lib/proiel/cli/converters/tiger.rb +110 -114
data/lib/proiel/cli/converters/tiger2.rb +139 -141
data/lib/proiel/cli/converters/tnt.rb +19 -15
data/lib/proiel/cli/version.rb +1 -1
data/lib/proiel/cli.rb +26 -1
metadata +43 -58
data/bin/setup +0 -8
data/contrib/proiel-tnt-train +0 -15
data/lib/proiel/cli/commands.rb +0 -28

data/lib/proiel/cli/converters/conll-u/morphology.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 module PROIEL
   module Converter
     class CoNLLU
@@ -6,10 +5,10 @@ module PROIEL
       # try to guess deponency based on the lemma
       DEPONENTS = { 'lat' => /r\Z/,
                     'grc' => /ομαι\Z/ }
-      COPULAR_LEMMATA = ['sum,V-,lat', 'εἰμί#1,V-,grc']
+      COPULAR_LEMMATA = ['sum,V-,lat', 'eo#2,V-,lat','εἰμί#1,V-,grc', 'быти,V-,orv','стати#2,V-,orv','бꙑти,V-,chu']
       AUXILIARIES = COPULAR_LEMMATA + []
-      DETERMINERS = ['S-', 'Pd', 'Px']
-      NEGATION_LEMMATA = ['non,Df,lat', 'ne,Df,lat',
+      DETERMINERS = ['S-', 'Pd', 'Px']
+      NEGATION_LEMMATA = ['non,Df,lat', 'ne,Df,lat',
                           'μή,Df,grc',
                           'μήγε,Df,grc',
                           'μηδαμῶς,Df,grc',
@@ -39,28 +38,38 @@ module PROIEL
                           'ni,Df,got',
                           'nibai#2,Df,got',
                           'nih,Df,got',
+                          'не,Df,orv',
+                          'ни,Df,orv',
+                          'ниже,Df,orv',
+                          'нѣ,Df,orv',
                          ]
       TAM_PARTICLE_LEMMATA = ['ἄν,Df,grc',
                              ]
       PARTICLE_LEMMATA = [ 'at,Df,lat',
                            'atque,Df,lat',
                            'autem,Df,lat',
                            'certe,Df,lat',
+                           'en,Df,lat',
+                           'equidem,Df,lat',
                            'ergo,Df,lat',
                            'et,Df,lat',
                            'enim,Df,lat',
+                           'etenim,Df,lat',
                            'etiam,Df,lat',
                            'igitur,Df,lat',
                            'immo,Df,lat',
                            'itaque,Df,lat',
                            'nam,Df,lat',
+                           'namque,Df,lat',
                            'nonne,Df,lat',
                            'nonne,Du,lat',
+                           'num,Df,lat',
                            'quidem,Df,lat',
                            'quoque,Df,lat',
                            'sic,Df,lat',
+                           'siquidem,Df,lat',
                            'tamen,Df,lat',
                            'tum,Df,lat',
                            'tunc,Df,lat',
@@ -138,82 +147,162 @@ module PROIEL
                            'þannu,Df,got',
                            'þanuh,Df,got',
                            'þaruh,Df,got',
+                           'али,Df,orv',
+                           'аль,Df,orv',
+                           'ано,Df,orv',
+                           'атъ,Df,orv',
+                           'ать,Df,orv',
+                           'бо,Df,orv',
+                           'вѣдь,Df,orv',
+                           'да#2,Df,orv',
+                           'еда,Df,orv',
+                           'же,Df,orv',
+                           'зане,Df,orv',
+                           'занеже,Df,orv',
+                           'ибо,Df,orv',
+                           'ино,Df,orv',
+                           'ли,Df,orv',
+                           'ну,Df,orv',
+                           'понеже,Df,orv',
+                           'си,Df,orv',
+                           'ти,Df,orv',
+                           'убо,Df,orv',
+                           'ужь,Df,orv',
+                           'ци,Df,orv',
+                           'яко,Df,orv',
+                           'якоже,Df,orv',
                          ]
-      POS_MAP =
-        {
+      COMPARISON_LEMMATA = ['alja,Df,got',
+                            'ar̄awel,Df,xcl',
+                            'atque,Df,lat',
+                            'baycʻ,Df,xcl',
+                            'etʻe,Df,xcl',
+                            'ibrew,Df,xcl',
+                            'ibrew z-,Df,xcl',
+                            'kʻan z,Df,xcl',
+                            'licet,Df,lat',
+                            'nibai,Df,got',
+                            'nisi,Df,lat',
+                            'orpēs,Df,xcl',
+                            'praeterquam,Df,lat',
+                            'quam,Df,lat',
+                            'quasi,Df,lat',
+                            'quemadmodum,Df,lat',
+                            'si,Df,lat',
+                            'sicut,Df,lat',
+                            'swaswe,Df,got',
+                            'swe,Df,got',
+                            'tamquam,Df,lat',
+                            'tʻe,Df,xcl',
+                            'ut,Df,lat',
+                            'velut,Df,lat',
+                            'þau,Df,got',
+                            'ἅτε,Df,grc',
+                            'εἰ,Df,grc',
+                            'ἤ,Df,grc',
+                            'ἤπερ,Df,grc',
+                            'καθάπερ,Df,grc',
+                            'καθώς,Df,grc',
+                            'οἷα,Df,grc',
+                            'ὁμοίως,Df,grc',
+                            'ὅτι,Df,grc',
+                            'ὡς,Df,grc',
+                            'ὡσεί,Df,grc',
+                            'ὥσπερ,Df,grc',
+                            'ако,Df,orv',
+                            'акъже,Df,orv',
+                            'акы,Df,orv',
+                            'акꙑ,Df,chu',
+                            'будьто,Df,orv',
+                            'како,Df,orv',
+                            'ли,Df,chu',
+                            'неже,Df,chu',
+                            'нежели,Df,chu',
+                            'нежели,Df,orv',
+                            'окꙑ,Df,chu',
+                            'развѣ,Df,chu',
+                            'тъкъмо,Df,chu',
+                            'чьто,Df,orv',
+                            'яко,Df,orv',
+                            'якоже,Df,orv',
+                            'ꙗко,Df,chu',
+                            'ꙗкоже,Df,chu'
+                           ]
+      POS_MAP =
+        {
           'A-' => [['ADJ', lambda { |x| true } ]],
           'C-' => [['CCONJ', lambda { |x| true } ]],
-          'Df' => [['AUX', lambda(&:TAM_particle?)],
-                   ['ADV', lambda(&:negation?), "Polarity=Neg"],
+          'Df' => [['AUX', lambda(&:tam_particle?)],
+                   ['ADV', lambda(&:negation?), 'Polarity=Neg'],
                    ['ADV', lambda { |x| true } ]
                   ],
-          'Dq' => [['ADV', lambda { |x| true }, "PronType=Rel"]],
-          'Du' => [['ADV', lambda { |x| true }, "PronType=Int"]],
+          'Dq' => [['ADV', lambda { |x| true }, 'PronType=Rel']],
+          'Du' => [['ADV', lambda { |x| true }, 'PronType=Int']],
           'F-' => [['X', lambda { |x| true } ]],
           'G-' => [['SCONJ', lambda { |x| true } ]],
           'I-' => [['INTJ', lambda { |x| true } ]],
-          'Ma' => [['NUM', lambda { |x| true } ]],
-          'Mo' => [['ADJ', lambda { |x| true } ]],
+          'Ma' => [['NUM', lambda { |x| true } ]],
+          'Mo' => [['ADJ', lambda { |x| true } ]],
           'N-' => [['SCONJ', lambda { |x| true } ]], #irrelevant for our purposes
           'Nb' => [['NOUN', lambda { |x| true } ]],
           'Ne' => [['PROPN', lambda { |x| true } ]],
-          'Pc' => [['PRON', lambda { |x| true }, "PronType=Rcp"]],
-          'Pd' => [['DET', lambda { |x| true } ]],
-          'Pi' => [['PRON', lambda { |x| true }, "PronType=Int"]],
+          'Pc' => [['PRON', lambda { |x| true }, 'PronType=Rcp']],
+          'Pd' => [['DET', lambda { |x| true } ]],
+          'Pi' => [['PRON', lambda { |x| true }, 'PronType=Int']],
           'Pk' => [['AUX', lambda { |x| x.relation == 'aux' }],
-                   ['PRON', lambda { |x| true }, "PronType=Prs|Reflex=Yes"]],
-          'Pp' => [['PRON', lambda { |x| true }, "PronType=Prs"]],
-          'Pr' => [['PRON', lambda { |x| true }, "PronType=Rel"]],
-          'Ps' => [['ADJ', lambda { |x| true }, "Poss=Yes"]],   ###  NB no evidence for a pronominal/determiner-like nature here
-          'Pt' => [['ADJ', lambda { |x| true }, "Poss=Yes|Reflex=Yes" ]],   ###  NB no evidence for a pronominal/determiner-like nature here
-          'Px' => [['DET', lambda { |x| true } ]],
-          'Py' => [['PRON', lambda { |x| true } ]],
+                   ['PRON', lambda { |x| true }, 'PronType=Prs|Reflex=Yes']],
+          'Pp' => [['PRON', lambda { |x| true }, 'PronType=Prs']],
+          'Pr' => [['PRON', lambda { |x| true }, 'PronType=Rel']],
+          'Ps' => [['DET', lambda { |x| true }, 'Poss=Yes']],   ###  NB no evidence for a pronominal/determiner-like nature here
+          'Pt' => [['DET', lambda { |x| true }, 'Poss=Yes|Reflex=Yes' ]],   ###  NB no evidence for a pronominal/determiner-like nature here
+          'Px' => [['DET', lambda { |x| true } ]],
+          'Py' => [['PRON', lambda { |x| true } ]],
           'R-' => [['ADP', lambda { |x| true } ]],
           'V-' => [['AUX', lambda(&:auxiliary?)],
                    ['VERB', lambda { |x| true } ]],
-          'S-' => [['DET', lambda { |x| true }, "Definite=Def|PronType=Dem"]], # (we only have definite articles)
+          'S-' => [['DET', lambda { |x| true }, 'Definite=Def|PronType=Dem']], # (we only have definite articles)
           'X-' => [['X', lambda { |x| true } ]]
-                  }
+      }
       MORPHOLOGY_MAP = {
-        :person => {'1' => 'Person=1',
-                    '2' => 'Person=2',
-                    '3' => 'Person=3'  } ,
-        :number => {'s' => 'Number=Sing',
-                    'd' => 'Number=Dual',
+        :person => {'1' => 'Person=1',
+                    '2' => 'Person=2',
+                    '3' => 'Person=3'  } ,
+        :number => {'s' => 'Number=Sing',
+                    'd' => 'Number=Dual',
                     'p' => 'Number=Plur'  } ,
-        :tense  => {'p' => 'Tense=Pres',
-                    'i' => 'Tense=Past|Aspect=Imp',
-                    'r' => 'Tense=Past|Aspect=Perf', #'Tense=Perfect',
-                    's' => 'Aspect=Res',
+        :tense  => {'p' => 'Tense=Pres',
+                    'i' => 'Tense=Past|Aspect=Imp',
+                    'r' => 'Tense=Past|Aspect=Perf', #'Tense=Perfect',
+                    's' => 'VerbForm=PartRes|Tense=Past',
                     # tags Perf is not universal
-                    'a' => 'Tense=Past|Aspect=Perf',
-                    'u' => 'Tense=Past',
-                    'l' => 'Tense=Pqp',
-                    'f' => 'Tense=Fut',
+                    'a' => 'Tense=Past|Aspect=Perf',
+                    'u' => 'Tense=Past',
+                    'l' => 'Tense=Pqp',
+                    'f' => 'Tense=Fut',
                     # tag FutPerfect is not universal
-                    't' => 'Tense=Fut|Aspect=Perf', #FutPerfect'
+                    't' => 'Tense=Fut|Aspect=Perf', #FutPerfect'
                       },
-        :mood =>   {'i' => 'VerbForm=Fin|Mood=Ind',
-                    's' => 'VerbForm=Fin|Mood=Sub',
-                    'm' => 'VerbForm=Fin|Mood=Imp',
-                    'o' => 'VerbForm=Fin|Mood=Opt',
-                    'n' => 'VerbForm=Inf',
-                    'p' => 'VerbForm=Part',
-                    'd' => 'VerbForm=Ger',
+        :mood =>   {'i' => 'VerbForm=Fin|Mood=Ind',
+                    's' => 'VerbForm=Fin|Mood=Sub',
+                    'm' => 'VerbForm=Fin|Mood=Imp',
+                    'o' => 'VerbForm=Fin|Mood=Opt',
+                    'n' => 'VerbForm=Inf',
+                    'p' => 'VerbForm=Part',
+                    'd' => 'VerbForm=Ger',
                     # Gdv (gerundive) is not universal
-                    'g' => 'VerbForm=Gdv',
-                    'u' => 'VerbForm=Sup',
-                    'e'=> 'VerbForm=Fin|Mood=Ind,Sub',
-                    'f'=> 'VerbForm=Fin|Mood=Imp,Ind',
-                    'h'=> 'VerbForm=Fin|Mood=Imp,Sub',
+                    'g' => 'VerbForm=Gdv',
+                    'u' => 'VerbForm=Sup',
+                    'e'=> 'VerbForm=Fin|Mood=Ind,Sub',
+                    'f'=> 'VerbForm=Fin|Mood=Imp,Ind',
+                    'h'=> 'VerbForm=Fin|Mood=Imp,Sub',
                     't' => 'VerbForm=Fin' },
-        :voice =>  {'a' => 'Voice=Act',
+        :voice =>  {'a' => 'Voice=Act',
                     # Med is not universal
-                    'm' => 'Voice=Mid',
-                    'p' => 'Voice=Pass',
+                    'm' => 'Voice=Mid',
+                    'p' => 'Voice=Pass',
                     'e' => 'Voice=Mid,Pass' },
         :gender => {'m' => 'Gender=Masc',
                     'f' => 'Gender=Fem',
@@ -221,27 +310,28 @@ module PROIEL
                     'p' => 'Gender=Fem,Masc',
                     'o' => 'Gender=Masc,Neut',
                     'r' => 'Gender=Fem,Neut' },
-        :case =>   {'n' => 'Case=Nom',
-                    'a' => 'Case=Acc',
+        :case =>   {'n' => 'Case=Nom',
+                    'a' => 'Case=Acc',
                     # Obl(ique) is not universal
-                    'o' => 'Case=Obl',
-                    'g' => 'Case=Gen',
-                    'c' => 'Case=Dat,Gen',
-                    'e' => 'Case=Acc,Dat',
-                    'd' => 'Case=Dat',
-                    'b' => 'Case=Abl',
-                    'i' => 'Case=Ins',
-                    'l' => 'Case=Loc',
+                    'o' => 'Case=Obl',
+                    'g' => 'Case=Gen',
+                    'c' => 'Case=Dat,Gen',
+                    'e' => 'Case=Acc,Dat',
+                    'd' => 'Case=Dat',
+                    'b' => 'Case=Abl',
+                    'i' => 'Case=Ins',
+                    'l' => 'Case=Loc',
                     'v' => 'Case=Voc' },
-        :degree => {'p' => 'Degree=Pos',
-                    'c' => 'Degree=Cmp',
+        :degree => {'p' => 'Degree=Pos',
+                    'c' => 'Degree=Cmp',
                     's' => 'Degree=Sup' },
         # The whole strength category is not universal
-        :strength => {'w' => 'Strength=Weak',
-                      's' => 'Strength=Strong'},
+        :strength => {'s' => 'Strength=Strong',
+                      'w' => 'Strength=Weak' },
         :inflection => {},
       }
     end
   end
 end

data/lib/proiel/cli/converters/conll-u/syntax.rb CHANGED Viewed

@@ -2,86 +2,132 @@ module PROIEL
   module Converter
     class CoNLLU
-      OBLIQUENESS_HIERARCHY = ["nsubj", "obj", "iobj", "obl", "advmod", "csubj", "xcomp", "ccomp", "advcl"]
+      OBLIQUENESS_HIERARCHY = ['nsubj', 'obj', 'iobj', 'obl', 'advmod', 'csubj', 'xcomp', 'ccomp', 'advcl']
+      REL_TO_POS = {
+        'acl' => 'VERB',
+        'advcl' => 'VERB',
+        'advcl:cmp' => 'NOUN',
+        'advmod' => 'ADV',
+        'amod' => 'ADJ',
+        'appos' => 'NOUN',
+        'ccomp' => 'VERB',
+        'conj' => 'X',
+        'csubj' => 'VERB',
+        'csubj:pass' => 'NOUN',
+        'dep' => 'X',
+        'det' => 'DET',
+        'dislocated' => 'X',
+        'fixed' => 'X',
+        'flat:foreign' => 'X',
+        'flat:name' => 'PROPN',
+        'nmod' => 'NOUN',
+        'nsubj' => 'NOUN',
+        'nsubj:pass' => 'NOUN',
+        'nsubj:outer' => 'NOUN',
+        'nummod' => 'NUM',
+        'obj' => 'NOUN',
+        'obl' => 'NOUN',
+        'obl:agent' => 'NOUN',
+        'obl:arg' => 'NOUN',
+        'orphan' => 'NOUN',
+        'parataxis' => 'VERB',
+        'root' => 'VERB',
+        'vocative' => 'NOUN',
+        'xcomp' => 'VERB'
+       }
       RELATION_MAPPING = {
-        "adnom" => "dep",
-        "adv" =>  [["advcl", lambda(&:clausal?) ],
-                   ["advmod", lambda { |x| x.adverb? or x.preposition? } ],
-                   ["advmod", lambda(&:adjectival?) ], # adjective for adverb
-                   ["obl", lambda(&:nominal?) ],
-                   ["advmod", lambda { |x| true } ],
+        'adnom' => 'dep',
+        'adv' =>  [['advcl', lambda(&:clausal?) ],
+                   ['advmod', lambda { |x| x.adverb? } ],
+                   ['advmod', lambda(&:adjectival?) ], # adjective for adverb
+                   ['obl', lambda { |x| x.nominal? or x.preposition? or x.has_preposition? } ],
+                   ['advcl', lambda(&:subjunction?) ],
+                   ['obl', lambda { |x| true } ],
                   ],
-        "ag" => "obl:agent", # add :agent" once defined
-        "apos" => [["flat:name", lambda { |x| x.proper_noun? and x.head and x.head.proper_noun? } ],
-                   ["appos", lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.nominal? } ],
-                   ["acl", lambda { |x| x.clausal? and x.head and x.head.nominal? } ],  # add :relcl ?
-                   # what to do about sentential appositions?
-                   ["advcl", lambda(&:clausal?) ],
-                   ["appos", lambda { |x| true } ],
+        'ag' => 'obl:agent', # add :agent' once defined
+        'apos' => [['flat:name', lambda { |x| x.proper_noun? and x.head and x.head.proper_noun? } ],
+                   ['acl', lambda { |x| x.clausal? and x.head and x.head.nominal? } ],  # add :relcl ?
+                   ['appos', lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.nominal? } ],
+                   ['parataxis', lambda { |x| x.clausal? and x.head and x.head.clausal? } ],
+                   # what to do about sentential appositions? attempt here to make them parataxis, but there are some legitimate nominal appos under root nominals, so overgenerates slightly
+                   ['advcl', lambda(&:clausal?) ],
+                   ['appos', lambda { |x| true } ],
                   ],
-        "arg" => "dep",
-        "atr" => [["nummod", lambda(&:cardinal?) ],
-                  ["det", lambda { |x| x.pronominal? and !(!x.genitive? and x.head and x.head.genitive?) } ], #TODO check
-                  ["nmod", lambda(&:nominal?) ],
-                  ["acl", lambda { |x| x.clausal? } ],  # add :relcl?
-                  ["advmod", lambda { |x| x.head and x.head.clausal? } ],
-                  ["amod", lambda { |x| true } ], #default
+        'arg' => 'dep',
+        'atr' => [['nummod', lambda(&:cardinal?) ],
+                  ['det', lambda { |x| x.pronominal? and !x.clausal? and !(!x.genitive? and x.head and x.head.genitive?) } ], #TODO check
+                  ['acl', lambda { |x| x.clausal? } ],  # add :relcl?
+                  ['nmod', lambda(&:nominal?) ],
+                  ['advmod', lambda { |x| x.head and !x.head.nominal? and x.head.clausal? } ],
+                  ['amod', lambda { |x| true } ], #default
                  ],
-        "aux" => [["det", lambda(&:determiner?) ],
-                  ["aux:pass", lambda { |x| x.clausal? and x.head.passive?  } ],
-                  ["aux", lambda(&:clausal?) ], #v2 probably want the modal particle an to go here too in
-                  ["advmod", lambda(&:negation?) ],
-                  ["discourse", lambda { |x| x.particle? or x.interjection? } ],
-                  # include subjunctions that are aux here; (root sentences with subjunction)
-                  ["advmod", lambda { |x| x.adjectival? or x.adverb? or x.subjunction? } ],
-                  ["cc", lambda(&:conjunction?) ],
-                  ["flat:foreign", lambda(&:foreign?) ],
+        'aux' => [['det', lambda(&:determiner?) ],
+                  ['fixed', lambda { |x| x.head and x.head.subjunction? } ],
+                  ['fixed', lambda { |x| x.head and x.head.conjunction? } ],
+                  ['fixed', lambda { |x| x.head and x.head.adverb? and x.relative? } ],
+                  ['fixed', lambda { |x| x.head and x.head.pronominal? and x.verb? } ],
+                  ['aux:pass', lambda { |x| x.clausal? and x.head.passive?  } ],
+                  ['aux', lambda(&:clausal?) ], #v2 probably want the modal particle an to go here too in
+                  ['advmod', lambda(&:negation?) ],
+                  ['discourse', lambda { |x| x.particle? or x.interjection? } ],
+                  ['advmod', lambda { |x| x.adjectival? or x.adverb? } ],
+                  # make subjunctions in root sentences "mark"
+                  ['mark', lambda { |x| x.subjunction? } ],
+                  ['cc', lambda(&:conjunction?) ],
+                  ['flat:foreign', lambda(&:foreign?) ],
                   # We need some more distinctions to get Gothic and Armenian. Introduce language in the treebank? (Read from xml)
-                  ["mark", lambda { |x| ['R-'].include? x.part_of_speech  } ], #'R-' as infinitive marker in Gothic
-                  ["aux", lambda { |x| ['Pk' ].include? x.part_of_speech  } ], #reflexive as valency reducer
+                  ['mark', lambda { |x| ['R-'].include? x.part_of_speech  } ], #"R-" as infinitive marker in Gothic
+                  ['expl:pv', lambda { |x| ['Pk' ].include? x.part_of_speech  } ], #reflexive as valency reducer
                   ['amod', lambda { |x| x.preposition? } ], # Armenian DOM
                   ['fixed', lambda { |x| ['Px', 'Pr'].include? x.part_of_speech } ], # NB there are a lot of bogus annotations with 'Px'
                   # MISANNOTATION  IF A NOUN or a 'Pi' or a 'Pp' or a 'Ps'
                  ],
-        "comp" => [['csubj:pass', lambda { |x| x.head and x.head.passive? } ],
-                   ['csubj', lambda { |x| x.head and x.head.copula? } ],
+        'comp' => [['csubj:pass', lambda { |x| x.head and x.head.passive? and !x.head.has_subject?} ],
+                   ['csubj', lambda { |x| x.head and x.head.has_copula? and !x.head.has_subject?} ],
                    ['ccomp', lambda { |x| true } ],
                   ],
-        "expl" => "expl",
-        "narg" => [['acl', lambda(&:clausal?) ],
-                   ['nmod', lambda(&:nominal?) ],
+        'expl' => 'expl',
+        'narg' => [['acl', lambda(&:clausal?) ],
+                   ['nmod', lambda(&:nominal?) ],
                    ['nmod', lambda(&:adjectival?) ], # nominaliezed in this function
                    ['nmod', lambda { |x| true } ],
                   ],
-        "nonsub" => "dep",
-        "obj" => "obj:dir",
-        "obl" => [# normally a preposition will be subordinate to its noun, this captures adverbial use of prepositions
-                  ["advmod", lambda { |x| x.adverb? or x.preposition? } ],
-                  ["obl", lambda { |x| x.has_preposition? } ],
-                  ["iobj", lambda(&:nominal?) ],# if nominal (NB check for presence of article!) TODO: should be "obj" if the verb is monovalent (even by elision)
-                  ["iobj", lambda(&:adjectival?) ], # OBL adjectives are nominalized
-                  ["advcl", lambda(&:clausal?) ], # this seems to happen with ad libros legendos etc. but check closer!
-                  ["iobj", lambda { |x| true } ],
+        'nonsub' => 'dep',
+        'obj' => 'obj',
+        'obl' => [# normally a preposition will be subordinate to its noun, this captures adverbial use of prepositions
+                  ['advmod', lambda { |x| x.adverb? } ],
+                  ['obl', lambda { |x| x.has_preposition? or x.preposition? } ],
+                  ['obl', lambda { |x| x.head and x.head.adverb? } ],
+                  ['obl:arg', lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.clausal? } ],# if nominal (NB check for presence of article!) TODO: should be 'obj' if the verb is monovalent (even by elision)
+                  #['obl:arg', lambda(&:adjectival?) ], # OBL adjectives are nominalized
+                  ['advcl', lambda(&:clausal?) ], # this seems to happen with ad libros legendos etc. but check closer!
+                  ['obl', lambda { |x| true } ],
                  ],
-        "parpred" => "parataxis",
-        "part" => "nmod",
-        "per" => "dep",
-        "pid" => ["ERROR", lambda { |x| raise "Remaining pid edge!" } ],
-        "pred" => [["root", lambda(&:root?) ],
-                   ["ERROR", lambda { |x| raise "#{x.to_n} (head_id #{x.head_id}) is not a root!" }],
+        'parpred' => 'parataxis',
+        'part' => 'nmod',
+        'per' => 'dep',
+        'pid' => ['ERROR', lambda { |x| raise 'Remaining pid edge!' } ],
+        'pred' => [['root', lambda(&:root?) ],
+                   ['ERROR', lambda { |x| raise "#{x.to_n} (head_id #{x.head_id}) is not a root!" }],
                   ],
-        "rel" => "acl", # add :relcl?
-        "sub" => [["nsubj:pass", lambda { |x| x.head and x.head.passive? } ],
-                  ["nsubj", lambda { |x| true }],
+        'rel' => 'acl', # add :relcl?
+        'sub' => [['nsubj:pass', lambda { |x| x.head and x.head.passive? } ],
+                  #['obl', lambda { |x| x.head and x.head.part_of_speech == 'Df' } ],
+                  ['nsubj', lambda { |x| true }],
+                 ],
+        'voc' => [['discourse', lambda { |x| x.part_of_speech == 'I-' } ],
+                  ['vocative', lambda { |x| true } ],
                  ],
-        "voc" => "vocative",
-        "xadv" => [["advcl", lambda(&:clausal?)], #add :contr ?
-                   ["advmod", lambda { |x| true } ], # add :contr ?
+        'xadv' => [['advcl', lambda(&:clausal?)], #add :contr ?
+                   ['xcomp', lambda { |x| x.nominal? or x.pronominal? or x.cardinal?} ],
+                   ['advcl', lambda(&:subjunction?)],
+                   ['advmod', lambda { |x| true } ], # add :contr ?
                   ],
-        "xobj" => "xcomp", # copula cases have already been taken care of
-        "xsub" => "xsub",
+        'xobj' => 'xcomp', # copula cases have already been taken care of
+        'xsub' => 'xsub',
       }
     end
   end