opener-kaf-naf-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +674 -0
- data/README.md +46 -0
- data/bin/kaf-naf-parser +8 -0
- data/bin/kaf-naf-parser-server +10 -0
- data/bin/kaf-to-naf +7 -0
- data/bin/naf-to-kaf +7 -0
- data/config.ru +4 -0
- data/core/kaf-naf-parser.py +42 -0
- data/core/packages/KafNafParser-1.2.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.3.tar.gz +0 -0
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/SOURCES.txt +22 -0
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/installed-files.txt +47 -0
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +338 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +80 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +300 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.py +71 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +200 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.py +15 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.py +90 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/SOURCES.txt +14 -0
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/installed-files.txt +23 -0
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/kaf_naf_parser.rb +77 -0
- data/lib/opener/kaf_naf_parser/cli.rb +92 -0
- data/lib/opener/kaf_naf_parser/public/markdown.css +284 -0
- data/lib/opener/kaf_naf_parser/server.rb +16 -0
- data/lib/opener/kaf_naf_parser/version.rb +5 -0
- data/lib/opener/kaf_naf_parser/views/index.erb +103 -0
- data/lib/opener/kaf_naf_parser/views/result.erb +15 -0
- data/opener-kaf-naf-parser.gemspec +38 -0
- data/pre_build_requirements.txt +3 -0
- metadata +283 -0
data/README.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
kaf-naf-parser
|
|
2
|
+
==============
|
|
3
|
+
|
|
4
|
+
This library converts KAF to NAF and NAF to KAF. It also contains a webservice
|
|
5
|
+
for doing exactly this.
|
|
6
|
+
|
|
7
|
+
It uses slightly altered
|
|
8
|
+
distributions of the VUA_pylib and KafNafParserPy but it keeps the external
|
|
9
|
+
libraries contained, so that shouldn't influence anything on your system.
|
|
10
|
+
|
|
11
|
+
This does mean however, that changes to the VUA_pylib and KafNafParserPy are not
|
|
12
|
+
automatically resembled in the kaf-naf-parser.
|
|
13
|
+
|
|
14
|
+
It ships with 3 command line tools and a webservice:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
kaf-to-naf
|
|
18
|
+
naf-to-kaf
|
|
19
|
+
kaf-naf-parser
|
|
20
|
+
kaf-naf-parser-server
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
It works like this:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
cat some_kaf.kaf | kaf-to-naf > output.naf
|
|
27
|
+
cat some_naf.naf | naf-to-kaf > output.kaf
|
|
28
|
+
|
|
29
|
+
cat some_kaf.kaf | kaf-naf-parser --tonaf > output.naf
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The webservice can be started like this:
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
naf-to-kaf-parser-server
|
|
36
|
+
```
|
|
37
|
+
And can then be accessed at: http://localhost:9292
|
|
38
|
+
|
|
39
|
+
Easiest way to install the gem:
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
git clone git@github.com:opener-project/kaf-naf-parser.git
|
|
43
|
+
cd kaf-naf-parser
|
|
44
|
+
rake install
|
|
45
|
+
```
|
|
46
|
+
|
data/bin/kaf-naf-parser
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'rack'
|
|
4
|
+
|
|
5
|
+
# Without calling `Rack::Server#options` manually the CLI arguments will never
|
|
6
|
+
# be passed, thus the application can't be specified as a constructor argument.
|
|
7
|
+
server = Rack::Server.new
|
|
8
|
+
server.options[:config] = File.expand_path('../../config.ru', __FILE__)
|
|
9
|
+
|
|
10
|
+
server.start
|
data/bin/kaf-to-naf
ADDED
data/bin/naf-to-kaf
ADDED
data/config.ru
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
# This updates the load path to ensure that the local site-packages directory
|
|
4
|
+
# can be used to load packages (e.g. a locally installed copy of lxml).
|
|
5
|
+
|
|
6
|
+
import sys, getopt, os
|
|
7
|
+
|
|
8
|
+
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'site-packages/pre_build'))
|
|
9
|
+
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'site-packages/pre_install'))
|
|
10
|
+
|
|
11
|
+
from KafNafParser import KafNafParser
|
|
12
|
+
|
|
13
|
+
def main(argv):
|
|
14
|
+
conversion = ""
|
|
15
|
+
try:
|
|
16
|
+
opts, args = getopt.getopt(argv,"hkn",["tokaf","tonaf"])
|
|
17
|
+
except getopt.GetoptError:
|
|
18
|
+
print 'could not parse options. Correct usage: \n\n kaf-naf-parser.py --tokaf --tonaf'
|
|
19
|
+
sys.exit(2)
|
|
20
|
+
for opt, arg in opts:
|
|
21
|
+
if opt == '-h':
|
|
22
|
+
print 'test.py --tokaf --tonaf'
|
|
23
|
+
sys.exit()
|
|
24
|
+
elif opt in ("-k", "--tokaf"):
|
|
25
|
+
conversion = "to-kaf"
|
|
26
|
+
elif opt in ("-n", "--tonaf"):
|
|
27
|
+
conversion = "to-naf"
|
|
28
|
+
|
|
29
|
+
if conversion == "":
|
|
30
|
+
conversion = "kaf-naf"
|
|
31
|
+
|
|
32
|
+
obj = KafNafParser(sys.stdin)
|
|
33
|
+
|
|
34
|
+
if conversion == "to-kaf":
|
|
35
|
+
obj.to_kaf()
|
|
36
|
+
if conversion == "to-naf":
|
|
37
|
+
obj.to_naf()
|
|
38
|
+
|
|
39
|
+
obj.dump()
|
|
40
|
+
|
|
41
|
+
if __name__ == "__main__":
|
|
42
|
+
main(sys.argv[1:])
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 1.0
|
|
2
|
+
Name: KafNafParser
|
|
3
|
+
Version: 1.2
|
|
4
|
+
Summary: Parser between KAF and NAF
|
|
5
|
+
Home-page: https://github.com/cltl/KafNafParserPy
|
|
6
|
+
Author: Ruben Izquierdo
|
|
7
|
+
Author-email: r.izquierdobevia@vu.nl
|
|
8
|
+
License: UNKNOWN
|
|
9
|
+
Description: UNKNOWN
|
|
10
|
+
Platform: UNKNOWN
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
KafNafParser/KafNafParserMod.py
|
|
2
|
+
KafNafParser/__init__.py
|
|
3
|
+
KafNafParser/constituency_data.py
|
|
4
|
+
KafNafParser/coreference_data.py
|
|
5
|
+
KafNafParser/dependency_data.py
|
|
6
|
+
KafNafParser/entity_data.py
|
|
7
|
+
KafNafParser/external_references_data.py
|
|
8
|
+
KafNafParser/features_data.py
|
|
9
|
+
KafNafParser/header_data.py
|
|
10
|
+
KafNafParser/opinion_data.py
|
|
11
|
+
KafNafParser/references_data.py
|
|
12
|
+
KafNafParser/span_data.py
|
|
13
|
+
KafNafParser/term_data.py
|
|
14
|
+
KafNafParser/term_sentiment_data.py
|
|
15
|
+
KafNafParser/text_data.py
|
|
16
|
+
KafNafParser.egg-info/PKG-INFO
|
|
17
|
+
KafNafParser.egg-info/SOURCES.txt
|
|
18
|
+
KafNafParser.egg-info/dependency_links.txt
|
|
19
|
+
KafNafParser.egg-info/top_level.txt
|
|
20
|
+
KafNafParser/feature_extractor/__init__.py
|
|
21
|
+
KafNafParser/feature_extractor/constituency.py
|
|
22
|
+
KafNafParser/feature_extractor/dependency.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
../KafNafParser/__init__.py
|
|
2
|
+
../KafNafParser/header_data.py
|
|
3
|
+
../KafNafParser/text_data.py
|
|
4
|
+
../KafNafParser/term_data.py
|
|
5
|
+
../KafNafParser/entity_data.py
|
|
6
|
+
../KafNafParser/features_data.py
|
|
7
|
+
../KafNafParser/opinion_data.py
|
|
8
|
+
../KafNafParser/constituency_data.py
|
|
9
|
+
../KafNafParser/dependency_data.py
|
|
10
|
+
../KafNafParser/coreference_data.py
|
|
11
|
+
../KafNafParser/references_data.py
|
|
12
|
+
../KafNafParser/external_references_data.py
|
|
13
|
+
../KafNafParser/span_data.py
|
|
14
|
+
../KafNafParser/KafNafParserMod.py
|
|
15
|
+
../KafNafParser/term_sentiment_data.py
|
|
16
|
+
../KafNafParser/feature_extractor/dependency.py
|
|
17
|
+
../KafNafParser/feature_extractor/constituency.py
|
|
18
|
+
../KafNafParser/feature_extractor/__init__.py
|
|
19
|
+
../KafNafParser/__init__.pyc
|
|
20
|
+
../KafNafParser/header_data.pyc
|
|
21
|
+
../KafNafParser/text_data.pyc
|
|
22
|
+
../KafNafParser/term_data.pyc
|
|
23
|
+
../KafNafParser/entity_data.pyc
|
|
24
|
+
../KafNafParser/features_data.pyc
|
|
25
|
+
../KafNafParser/opinion_data.pyc
|
|
26
|
+
../KafNafParser/constituency_data.pyc
|
|
27
|
+
../KafNafParser/dependency_data.pyc
|
|
28
|
+
../KafNafParser/coreference_data.pyc
|
|
29
|
+
../KafNafParser/references_data.pyc
|
|
30
|
+
../KafNafParser/external_references_data.pyc
|
|
31
|
+
../KafNafParser/span_data.pyc
|
|
32
|
+
../KafNafParser/KafNafParserMod.pyc
|
|
33
|
+
../KafNafParser/term_sentiment_data.pyc
|
|
34
|
+
../KafNafParser/feature_extractor/dependency.pyc
|
|
35
|
+
../KafNafParser/feature_extractor/constituency.pyc
|
|
36
|
+
../KafNafParser/feature_extractor/__init__.pyc
|
|
37
|
+
../../../kaf_example.xml
|
|
38
|
+
../../../naf.dtd
|
|
39
|
+
../../../naf_example.xml
|
|
40
|
+
../../../test.py
|
|
41
|
+
../../../README.md
|
|
42
|
+
../../../LICENSE
|
|
43
|
+
./
|
|
44
|
+
SOURCES.txt
|
|
45
|
+
dependency_links.txt
|
|
46
|
+
top_level.txt
|
|
47
|
+
PKG-INFO
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
KafNafParser
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
## LIST OF CHANGES
|
|
2
|
+
# Ruben 8-nov-2013
|
|
3
|
+
# + included layers for entities, properties, opinions
|
|
4
|
+
# + renamed all classes to Cnameoftheclass
|
|
5
|
+
# Ruben 15-nov-2013
|
|
6
|
+
# + included constituency layer
|
|
7
|
+
#
|
|
8
|
+
# Ruben 19-nov-2013
|
|
9
|
+
# + included dependency layer
|
|
10
|
+
# Ruben 17-dec-2013
|
|
11
|
+
# + modified all to red/write NAF and KAF
|
|
12
|
+
#
|
|
13
|
+
# Ruben 21-Feb-2014
|
|
14
|
+
# + Included coreference layer
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__last_modified = '17dec2013'
|
|
18
|
+
|
|
19
|
+
from lxml import etree
|
|
20
|
+
from header_data import *
|
|
21
|
+
from text_data import *
|
|
22
|
+
from term_data import *
|
|
23
|
+
from entity_data import *
|
|
24
|
+
from features_data import *
|
|
25
|
+
from opinion_data import *
|
|
26
|
+
from constituency_data import *
|
|
27
|
+
from dependency_data import *
|
|
28
|
+
from feature_extractor import Cdependency_extractor, Cconstituency_extractor
|
|
29
|
+
from coreference_data import *
|
|
30
|
+
from references_data import Creferences
|
|
31
|
+
|
|
32
|
+
import sys
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class KafNafParser:
|
|
37
|
+
def __init__(self,filename):
|
|
38
|
+
self.tree = None
|
|
39
|
+
self.filename = filename
|
|
40
|
+
self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
|
|
41
|
+
self.root = self.tree.getroot()
|
|
42
|
+
self.type = self.root.tag # KAF NAF
|
|
43
|
+
|
|
44
|
+
self.header = None
|
|
45
|
+
self.text_layer = None
|
|
46
|
+
self.term_layer = None
|
|
47
|
+
self.entity_layer = None
|
|
48
|
+
self.features_layer = None
|
|
49
|
+
self.opinion_layer = None
|
|
50
|
+
self.constituency_layer = None
|
|
51
|
+
self.dependency_layer = None
|
|
52
|
+
self.coreference_layer = None
|
|
53
|
+
|
|
54
|
+
## Specific feature extractor for complicated layers
|
|
55
|
+
self.my_dependency_extractor = None
|
|
56
|
+
self.my_constituency_extractor = None
|
|
57
|
+
##################################################
|
|
58
|
+
|
|
59
|
+
#######
|
|
60
|
+
self.dict_tokens_for_tid = None
|
|
61
|
+
##
|
|
62
|
+
|
|
63
|
+
self.lang = self.root.get('{http://www.w3.org/XML/1998/namespace}lang')
|
|
64
|
+
self.version = self.root.get('version')
|
|
65
|
+
|
|
66
|
+
if self.type == 'NAF':
|
|
67
|
+
node_header = self.root.find('nafHeader')
|
|
68
|
+
elif self.type == 'KAF':
|
|
69
|
+
node_header = self.root.find('kafHeader')
|
|
70
|
+
|
|
71
|
+
if node_header is not None:
|
|
72
|
+
self.header = CHeader(node_header,self.type)
|
|
73
|
+
|
|
74
|
+
# Text layer adapted to naf/kaf
|
|
75
|
+
node_text = self.root.find('text')
|
|
76
|
+
if node_text is not None:
|
|
77
|
+
self.text_layer = Ctext(node=node_text,type=self.type)
|
|
78
|
+
|
|
79
|
+
node_term = self.root.find('terms')
|
|
80
|
+
if node_term is not None:
|
|
81
|
+
self.term_layer = Cterms(node=node_term,type=self.type)
|
|
82
|
+
|
|
83
|
+
node_entity = self.root.find('entities')
|
|
84
|
+
if node_entity is not None:
|
|
85
|
+
self.entity_layer = Centities(node_entity,type=self.type)
|
|
86
|
+
|
|
87
|
+
node_features = self.root.find('features')
|
|
88
|
+
if node_features is not None:
|
|
89
|
+
self.features_layer = Cfeatures(node_features,type=self.type)
|
|
90
|
+
|
|
91
|
+
node_opinions = self.root.find('opinions')
|
|
92
|
+
if node_opinions is not None:
|
|
93
|
+
self.opinion_layer = Copinions(node_opinions,type=self.type)
|
|
94
|
+
|
|
95
|
+
# Definition KAF/NAF is the same
|
|
96
|
+
node_constituency = self.root.find('constituency')
|
|
97
|
+
if node_constituency is not None:
|
|
98
|
+
self.constituency_layer = Cconstituency(node_constituency)
|
|
99
|
+
|
|
100
|
+
# Definition KAF/NAF is the same
|
|
101
|
+
node_dependency = self.root.find('deps')
|
|
102
|
+
if node_dependency is not None:
|
|
103
|
+
self.dependency_layer = Cdependencies(node_dependency)
|
|
104
|
+
|
|
105
|
+
node_coreferences = self.root.find('coreferences')
|
|
106
|
+
if node_coreferences is not None:
|
|
107
|
+
self.coreference_layer = Ccoreferences(node_coreferences,type=self.type)
|
|
108
|
+
|
|
109
|
+
def get_type(self):
|
|
110
|
+
return self.type
|
|
111
|
+
|
|
112
|
+
def get_filename(self):
|
|
113
|
+
return self.filename
|
|
114
|
+
|
|
115
|
+
def to_kaf(self):
|
|
116
|
+
#Convert the root
|
|
117
|
+
if self.type == 'NAF':
|
|
118
|
+
self.root.tag = 'KAF'
|
|
119
|
+
self.type = 'KAF'
|
|
120
|
+
|
|
121
|
+
## Convert the header
|
|
122
|
+
if self.header is not None:
|
|
123
|
+
self.header.to_kaf()
|
|
124
|
+
|
|
125
|
+
## Convert the token layer
|
|
126
|
+
if self.text_layer is not None:
|
|
127
|
+
self.text_layer.to_kaf()
|
|
128
|
+
|
|
129
|
+
## Convert the term layer
|
|
130
|
+
if self.term_layer is not None:
|
|
131
|
+
self.term_layer.to_kaf()
|
|
132
|
+
|
|
133
|
+
## Convert the entity layer
|
|
134
|
+
if self.entity_layer is not None:
|
|
135
|
+
self.entity_layer.to_kaf()
|
|
136
|
+
|
|
137
|
+
## Convert the features layer
|
|
138
|
+
## There is no feature layer defined in NAF, but we assumed
|
|
139
|
+
## that is defined will be followin the same rules
|
|
140
|
+
if self.features_layer is not None:
|
|
141
|
+
self.features_layer.to_kaf()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
##Convert the opinion layer
|
|
145
|
+
if self.opinion_layer is not None:
|
|
146
|
+
self.opinion_layer.to_kaf()
|
|
147
|
+
|
|
148
|
+
## Convert the constituency layer
|
|
149
|
+
## This layer is exactly the same in KAF/NAF
|
|
150
|
+
if self.constituency_layer is not None:
|
|
151
|
+
self.constituency_layer.to_kaf() #Does nothing...
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
## Convert the dedepency layer
|
|
155
|
+
## It is not defined on KAF so we assme both will be similar
|
|
156
|
+
if self.dependency_layer is not None:
|
|
157
|
+
self.dependency_layer.to_kaf()
|
|
158
|
+
|
|
159
|
+
if self.coreference_layer is not None:
|
|
160
|
+
self.coreference_layer.to_kaf()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def to_naf(self):
|
|
164
|
+
#Convert the root
|
|
165
|
+
if self.type == 'KAF':
|
|
166
|
+
self.root.tag = self.type = 'NAF'
|
|
167
|
+
|
|
168
|
+
## Convert the header
|
|
169
|
+
if self.header is not None:
|
|
170
|
+
self.header.to_naf()
|
|
171
|
+
|
|
172
|
+
## Convert the token layer
|
|
173
|
+
if self.text_layer is not None:
|
|
174
|
+
self.text_layer.to_naf()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
## Convert the term layer
|
|
178
|
+
if self.term_layer is not None:
|
|
179
|
+
self.term_layer.to_naf()
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
## Convert the entity layer
|
|
183
|
+
if self.entity_layer is not None:
|
|
184
|
+
self.entity_layer.to_naf()
|
|
185
|
+
|
|
186
|
+
## Convert the features layer
|
|
187
|
+
## There is no feature layer defined in NAF, but we assumed
|
|
188
|
+
## that is defined will be followin the same rules
|
|
189
|
+
if self.features_layer is not None:
|
|
190
|
+
self.features_layer.to_naf()
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
##Convert the opinion layer
|
|
194
|
+
if self.opinion_layer is not None:
|
|
195
|
+
self.opinion_layer.to_naf()
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
## Convert the constituency layer
|
|
199
|
+
## This layer is exactly the same in KAF/NAF
|
|
200
|
+
if self.constituency_layer is not None:
|
|
201
|
+
self.constituency_layer.to_naf() #Does nothing...
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
## Convert the dedepency layer
|
|
205
|
+
## It is not defined on KAF so we assume both will be similar
|
|
206
|
+
if self.dependency_layer is not None:
|
|
207
|
+
self.dependency_layer.to_naf() #Does nothing...
|
|
208
|
+
|
|
209
|
+
if self.coreference_layer is not None:
|
|
210
|
+
self.coreference_layer.to_naf()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def print_constituency(self):
|
|
215
|
+
print self.constituency_layer
|
|
216
|
+
|
|
217
|
+
def get_trees(self):
|
|
218
|
+
if self.constituency_layer is not None:
|
|
219
|
+
for tree in self.constituency_layer.get_trees():
|
|
220
|
+
yield tree
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_dependencies(self):
|
|
224
|
+
if self.dependency_layer is not None:
|
|
225
|
+
for dep in self.dependency_layer.get_dependencies():
|
|
226
|
+
yield dep
|
|
227
|
+
|
|
228
|
+
def get_language(self):
|
|
229
|
+
return self.lang
|
|
230
|
+
|
|
231
|
+
def get_tokens(self):
|
|
232
|
+
for token in self.text_layer:
|
|
233
|
+
yield token
|
|
234
|
+
|
|
235
|
+
def get_terms(self):
|
|
236
|
+
for term in self.term_layer:
|
|
237
|
+
yield term
|
|
238
|
+
|
|
239
|
+
def get_token(self,token_id):
|
|
240
|
+
if self.text_layer is not None:
|
|
241
|
+
return self.text_layer.get_wf(token_id)
|
|
242
|
+
else:
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
def get_term(self,term_id):
|
|
246
|
+
if self.term_layer is not None:
|
|
247
|
+
return self.term_layer.get_term(term_id)
|
|
248
|
+
else:
|
|
249
|
+
return None
|
|
250
|
+
|
|
251
|
+
def get_properties(self):
|
|
252
|
+
if self.features_layer is not None:
|
|
253
|
+
for property in self.features_layer.get_properties():
|
|
254
|
+
yield property
|
|
255
|
+
|
|
256
|
+
def get_entities(self):
|
|
257
|
+
if self.entity_layer is not None:
|
|
258
|
+
for entity in self.entity_layer:
|
|
259
|
+
yield entity
|
|
260
|
+
|
|
261
|
+
def get_opinions(self):
|
|
262
|
+
if self.opinion_layer is not None:
|
|
263
|
+
for opinion in self.opinion_layer.get_opinions():
|
|
264
|
+
yield opinion
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def dump(self,filename=sys.stdout):
|
|
268
|
+
self.tree.write(filename,encoding='UTF-8',pretty_print=True,xml_declaration=True)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def remove_opinion_layer(self):
|
|
272
|
+
if self.opinion_layer is not None:
|
|
273
|
+
this_node = self.opinion_layer.get_node()
|
|
274
|
+
self.root.remove(this_node)
|
|
275
|
+
self.opinion_layer = None
|
|
276
|
+
|
|
277
|
+
def remove_term_layer(self):
|
|
278
|
+
if self.term_layer is not None:
|
|
279
|
+
this_node = self.term_layer.get_node()
|
|
280
|
+
self.root.remove(this_node)
|
|
281
|
+
self.term_layer = None
|
|
282
|
+
|
|
283
|
+
if self.header is not None:
|
|
284
|
+
self.header.remove_lp('terms')
|
|
285
|
+
|
|
286
|
+
def get_constituency_extractor(self):
|
|
287
|
+
if self.constituency_layer is not None: ##Otherwise there are no constituens
|
|
288
|
+
if self.my_constituency_extractor is None:
|
|
289
|
+
self.my_constituency_extractor = Cconstituency_extractor(self)
|
|
290
|
+
return self.my_constituency_extractor
|
|
291
|
+
else:
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
def get_dependency_extractor(self):
|
|
295
|
+
if self.dependency_layer is not None: #otherwise there are no dependencies
|
|
296
|
+
if self.my_dependency_extractor is None:
|
|
297
|
+
self.my_dependency_extractor = Cdependency_extractor(self)
|
|
298
|
+
return self.my_dependency_extractor
|
|
299
|
+
else:
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
## ADDING METHODS
|
|
303
|
+
def add_wf(self,wf_obj):
|
|
304
|
+
if self.text_layer is None:
|
|
305
|
+
self.text_layer = Ctext(type=self.type)
|
|
306
|
+
self.root.append(self.text_layer.get_node())
|
|
307
|
+
self.text_layer.add_wf(wf_obj)
|
|
308
|
+
|
|
309
|
+
def add_opinion(self,opinion_obj):
|
|
310
|
+
if self.opinion_layer is None:
|
|
311
|
+
self.opinion_layer = Copinions()
|
|
312
|
+
self.root.append(self.opinion_layer.get_node())
|
|
313
|
+
self.opinion_layer.add_opinion(opinion_obj)
|
|
314
|
+
|
|
315
|
+
def add_linguistic_processor(self, layer ,my_lp):
|
|
316
|
+
self.header.add_linguistic_processor(layer,my_lp)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def add_dependency(self,my_dep):
|
|
320
|
+
if self.dependency_layer is None:
|
|
321
|
+
self.dependency_layer = Cdependencies()
|
|
322
|
+
self.root.append(self.dependency_layer.get_node())
|
|
323
|
+
self.dependency_layer.add_dependency(my_dep)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
## EXTRA FUNCTIONS
|
|
327
|
+
## Gets the token identifiers in the span of a term id
|
|
328
|
+
def get_dict_tokens_for_termid(self, term_id):
|
|
329
|
+
if self.dict_tokens_for_tid is None:
|
|
330
|
+
self.dict_tokens_for_tid = {}
|
|
331
|
+
for term in self.get_terms():
|
|
332
|
+
self.dict_tokens_for_tid[term.get_id()] = term.get_span().get_span_ids()
|
|
333
|
+
|
|
334
|
+
return self.dict_tokens_for_tid.get(term_id,[])
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
|