opener-pos-tagger-base 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +110 -0
- data/bin/pos-tagger-base +21 -0
- data/core/mapping.postag.stss.to.opener.csv +52 -0
- data/core/mapping.postag.wotan.to.opener.csv +13 -0
- data/core/opennlp/bin/opennlp +35 -0
- data/core/opennlp/bin/opennlp.bat +35 -0
- data/core/opennlp/lib/jwnl-1.3.3.jar +0 -0
- data/core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar +0 -0
- data/core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar +0 -0
- data/core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar +0 -0
- data/core/opennlp/models/de-pos-maxent.bin +0 -0
- data/core/opennlp/models/de-pos-perceptron.bin +0 -0
- data/core/opennlp/models/nl-pos-maxent.bin +0 -0
- data/core/opennlp/models/nl-pos-perceptron.bin +0 -0
- data/core/pos-tagger_open-nlp.py +160 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/token_matcher.py +80 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/pos_taggers/base.rb +90 -0
- data/lib/opener/pos_taggers/base/version.rb +7 -0
- data/opener-pos-tagger-base.gemspec +29 -0
- data/pre_build_requirements.txt +1 -0
- metadata +132 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e1d01b280c3f2369e20c811fa11a42150b41cc16
|
4
|
+
data.tar.gz: 7639fb3ce4fb64641047659339b500157940087c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 31dd9808cc4b3ce95de10c8e95c456af963a2b93cc1bbef60e9917716b9de830ce3b749c28b63017ab7ce90393172cf499563400fbb62873a39eb0be2d0e2f1a
|
7
|
+
data.tar.gz: 7b9ab3549277fc1c93b09b60eae00f9b56894dee3565550f9eee7d82209316d770083df32085885282da18a3c12475b1e4c897f9c8fd928f2c144841bea88e3d
|
data/README.md
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
[![Build Status](https://drone.io/github.com/opener-project/pos-tagger-base/status.png)](https://drone.io/github.com/opener-project/pos-tagger-base/latest)
|
2
|
+
|
3
|
+
# Base POS Tagger
|
4
|
+
|
5
|
+
This repository contains the source code (both Ruby and Python) for the base
|
6
|
+
POS tagger. Currently this tagger supports the following languages:
|
7
|
+
|
8
|
+
* Dutch
|
9
|
+
* German
|
10
|
+
|
11
|
+
## Requirements
|
12
|
+
|
13
|
+
* Python 2.7.0 or newer
|
14
|
+
* Ruby 1.9.2 or newer
|
15
|
+
* pip
|
16
|
+
* libxml2
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
Using Bundler:
|
21
|
+
|
22
|
+
gem 'opener-pos-tagger-base',
|
23
|
+
:git => 'git@github.com:opener-project/pos-tagger-base.git',
|
24
|
+
:branch => 'master'
|
25
|
+
|
26
|
+
Using `specific_install`:
|
27
|
+
|
28
|
+
gem install specific_install
|
29
|
+
gem specific_install opener-pos-tagger-base \
|
30
|
+
-l https://github.com/opener-project/pos-tagger-base.git
|
31
|
+
|
32
|
+
Using regular RubyGems (once the Gem is available):
|
33
|
+
|
34
|
+
gem install opener-pos-tagger-base
|
35
|
+
|
36
|
+
## Usage
|
37
|
+
|
38
|
+
Tagging a KAF file:
|
39
|
+
|
40
|
+
cat some_input_file.kaf | pos-tagger-base
|
41
|
+
|
42
|
+
## Contributing
|
43
|
+
|
44
|
+
First make sure all the required dependencies are installed:
|
45
|
+
|
46
|
+
bundle install
|
47
|
+
|
48
|
+
Then download the required Python code:
|
49
|
+
|
50
|
+
bundle exec rake compile
|
51
|
+
|
52
|
+
Once this is done continue reading the sections below to get a better
|
53
|
+
understanding about the repository structure.
|
54
|
+
|
55
|
+
## Structure
|
56
|
+
|
57
|
+
This repository comes in two parts: a collection of Python source files and
|
58
|
+
Ruby source code. The Python code can be found in `core/`, the Ruby code can be
|
59
|
+
found in the other directories (e.g. `lib/`).
|
60
|
+
|
61
|
+
Required Python packages are installed locally in to `core/site-packages/X`
|
62
|
+
where X is one of the following two:
|
63
|
+
|
64
|
+
* `pre_build`: contains packages that are installed before building the Gem,
|
65
|
+
these packages are shipped with the Gem
|
66
|
+
* `pre_install`: contains packages that are installed in to this directory upon
|
67
|
+
installing the Gem. This directory should exclusively be used for compiled
|
68
|
+
Python packages such as lxml.
|
69
|
+
|
70
|
+
There are also two requirements files for pip:
|
71
|
+
|
72
|
+
* `pre_build_requirements.txt`: installs the requirements for the `pre_build`
|
73
|
+
directory.
|
74
|
+
* `pre_install_requirements.txt`: installs the requirements for the
|
75
|
+
`pre_install` directory.
|
76
|
+
|
77
|
+
To easily install all the required dependencies (required for running the tests
|
78
|
+
for example) run the following:
|
79
|
+
|
80
|
+
bundle exec rake compile
|
81
|
+
|
82
|
+
This will take care of verifying the requirements and downloading and
|
83
|
+
installing the Python packages.
|
84
|
+
|
85
|
+
## Testing
|
86
|
+
|
87
|
+
To run the tests (which are powered by Cucumber), simply run the following:
|
88
|
+
|
89
|
+
bundle exec rake
|
90
|
+
|
91
|
+
This will take care of verifying the requirements, installing the Python code
|
92
|
+
and running the tests.
|
93
|
+
|
94
|
+
For more information on the available Rake tasks run the following:
|
95
|
+
|
96
|
+
bundle exec rake -T
|
97
|
+
|
98
|
+
## POS Details
|
99
|
+
|
100
|
+
### POS-tags models
|
101
|
+
|
102
|
+
* [Dutch-maxent](http://opennlp.sourceforge.net/models-1.5/nl-pos-maxent.bin)
|
103
|
+
* [Dutch-perceptron](http://opennlp.sourceforge.net/models-1.5/nl-pos-perceptron.bin)
|
104
|
+
* [German-maxent](http://opennlp.sourceforge.net/models-1.5/de-pos-maxent.bin)
|
105
|
+
* [German-perceptron](http://opennlp.sourceforge.net/models-1.5/de-pos-perceptron.bin)
|
106
|
+
|
107
|
+
### POS-tags sets
|
108
|
+
|
109
|
+
* Dutch: trained on conllx alpino data, wotan tagset
|
110
|
+
* German: trained on TIGER corpus, STSS tagset
|
data/bin/pos-tagger-base
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/opener/pos_taggers/base'
|
4
|
+
|
5
|
+
# STDIN.tty? returns `false` if data is being piped into the current process.
|
6
|
+
if STDIN.tty?
|
7
|
+
input = nil
|
8
|
+
else
|
9
|
+
input = STDIN.read
|
10
|
+
end
|
11
|
+
|
12
|
+
kernel = Opener::POSTaggers::Base.new(:args => ARGV)
|
13
|
+
stdout, stderr, process = kernel.run(input)
|
14
|
+
|
15
|
+
if process.success?
|
16
|
+
puts stdout
|
17
|
+
|
18
|
+
STDERR.puts(stderr) unless stderr.empty?
|
19
|
+
else
|
20
|
+
abort stderr
|
21
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
ADJA G ("Attributives Adjektiv"),
|
2
|
+
ADJD G ("Adverbiales oder pr�dikatives Adjektiv"),
|
3
|
+
ADV A ("Adverb"),
|
4
|
+
APPR P ("Pr�position; Zirkumposition links"),
|
5
|
+
APPRART P ("Pr�position mit Artikel"),
|
6
|
+
APPO P ("Postposition"),
|
7
|
+
APZR P ("Zirkumposition rechts"),
|
8
|
+
ART D ("Bestimmer oder unbestimmer Artikel"),
|
9
|
+
CARD O ("Kardinalzahl"),
|
10
|
+
FM O ("Fremdsprachichles Material"),
|
11
|
+
ITJ O ("Interjektion"),
|
12
|
+
KOUI C ("unterordnende Konjunktion mit zu und Infinitiv"),
|
13
|
+
KOUS C ("unterordnende Konjunktion mit Satz"),
|
14
|
+
KON C ("nebenordnende Konjunktion"),
|
15
|
+
KOKOM C ("Vergleichskonjunktion"),
|
16
|
+
NN N ("normales Nomen"),
|
17
|
+
NE R ("Eigennamen"),
|
18
|
+
PDS Q ("substituierendes Demonstrativpronomen"),
|
19
|
+
PDAT Q ("attribuierendes Demonstrativpronomen"),
|
20
|
+
PIS Q ("substituierendes Indefinitpronomen"),
|
21
|
+
PIAT Q ("attribuierendes Indefinitpronomen ohne Determiner"),
|
22
|
+
PIDAT Q ("attribuierendes Indefinitpronomen mit Determiner"),
|
23
|
+
PPER Q ("irreflexives Personalpronomen"),
|
24
|
+
PPOSS Q ("substituierendes Possessivpronomen"),
|
25
|
+
PPOSAT Q ("attribuierendes Possessivpronomen"),
|
26
|
+
PRELS Q ("substituierendes Relativpronomen"),
|
27
|
+
PRELAT Q ("attribuierendes Relativpronomen"),
|
28
|
+
PRF Q ("reflexives Personalpronomen"),
|
29
|
+
PWS Q ("substituierendes Interrogativpronomen"),
|
30
|
+
PWAT Q ("attribuierendes Interrogativpronomen"),
|
31
|
+
PWAV Q ("adverbiales Interrogativ- oder Relativpronomen"),
|
32
|
+
PAV Q ("Pronominaladverb"),
|
33
|
+
PTKZU O ("zu vor Infinitiv"),
|
34
|
+
PTKNEG O ("Negationspartike"),
|
35
|
+
PTKVZ V ("abgetrennter Verbzusatz"),
|
36
|
+
PTKANT O ("Antwortpartikel"),
|
37
|
+
PTKA O ("Partikel bei Adjektiv oder Adverb"),
|
38
|
+
TRUNC N ("Kompositions-Erstglied"),
|
39
|
+
VVFIN V ("finites Verb, voll"),
|
40
|
+
VVIMP V ("Imperativ, voll"),
|
41
|
+
VVINF V ("Infinitiv"),
|
42
|
+
VVIZU V ("Infinitiv mit zu"),
|
43
|
+
VVPP V ("Partizip Perfekt"),
|
44
|
+
VAFIN V ("finites Verb, aux"),
|
45
|
+
VAIMP V ("Imperativ, aux"),
|
46
|
+
VAINF V ("Infinitiv, aux"),
|
47
|
+
VAPP V ("Partizip Perfekt"),
|
48
|
+
VMFIN V ("finites Verb, modal"),
|
49
|
+
VMINF V ("Infinitiv, modal"),
|
50
|
+
VMPP V ("Partizip Perfekt, modal"),
|
51
|
+
XY O ("Nichtwort, Sonderzeichen"),
|
52
|
+
UNDEFINED O ("Nicht definiert, zb. Satzzeichen");
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
5
|
+
# distributed with this work for additional information
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
8
|
+
# "License"); you may not use this file except in compliance
|
9
|
+
# with the License. You may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
14
|
+
# software distributed under the License is distributed on an
|
15
|
+
# # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
# KIND, either express or implied. See the License for the
|
17
|
+
# specific language governing permissions and limitations
|
18
|
+
# under the License.
|
19
|
+
|
20
|
+
# Note: Do not output anything in this script file, any output
|
21
|
+
# may be inadvertantly placed in any output files if
|
22
|
+
# output redirection is used.
|
23
|
+
|
24
|
+
if [ -z "$JAVACMD" ] ; then
|
25
|
+
if [ -n "$JAVA_HOME" ] ; then
|
26
|
+
JAVACMD="$JAVA_HOME/bin/java"
|
27
|
+
else
|
28
|
+
JAVACMD="`which java`"
|
29
|
+
fi
|
30
|
+
fi
|
31
|
+
|
32
|
+
# Might fail if $0 is a link
|
33
|
+
OPENNLP_HOME=`dirname "$0"`/..
|
34
|
+
|
35
|
+
$JAVACMD -Xmx1024m -jar $OPENNLP_HOME/lib/opennlp-tools-*.jar $@
|
@@ -0,0 +1,35 @@
|
|
1
|
+
@ECHO off
|
2
|
+
|
3
|
+
REM # Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
REM # or more contributor license agreements. See the NOTICE file
|
5
|
+
REM # distributed with this work for additional information
|
6
|
+
REM # regarding copyright ownership. The ASF licenses this file
|
7
|
+
REM # to you under the Apache License, Version 2.0 (the
|
8
|
+
REM # "License"); you may not use this file except in compliance
|
9
|
+
REM # with the License. You may obtain a copy of the License at
|
10
|
+
REM #
|
11
|
+
REM # http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
REM #
|
13
|
+
REM # Unless required by applicable law or agreed to in writing,
|
14
|
+
REM # software distributed under the License is distributed on an
|
15
|
+
REM # # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
REM # KIND, either express or implied. See the License for the
|
17
|
+
REM # specific language governing permissions and limitations
|
18
|
+
REM # under the License.
|
19
|
+
|
20
|
+
REM # Note: Do not output anything in this script file, any output
|
21
|
+
REM # may be inadvertantly placed in any output files if
|
22
|
+
REM # output redirection is used.
|
23
|
+
|
24
|
+
IF "%JAVA_CMD%" == "" (
|
25
|
+
IF "%JAVA_HOME%" == "" (
|
26
|
+
SET JAVA_CMD=java
|
27
|
+
) ELSE (
|
28
|
+
SET JAVA_CMD=%JAVA_HOME%\bin\java
|
29
|
+
)
|
30
|
+
)
|
31
|
+
|
32
|
+
REM # Should work with Windows XP and greater. If not, specify the path to where it is installed.
|
33
|
+
IF "%OPENNLP_HOME%" == "" SET OPENNLP_HOME=%~sp0..
|
34
|
+
|
35
|
+
%JAVA_CMD% -Xmx4096m -jar %OPENNLP_HOME%\lib\opennlp-tools-*.jar %*
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,160 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
#-*- coding: utf-8 *-*
|
3
|
+
# Ruben Izquierdo
|
4
|
+
# Vrije University of Amsterdam
|
5
|
+
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
import operator
|
9
|
+
import time
|
10
|
+
import getopt
|
11
|
+
import string
|
12
|
+
import subprocess
|
13
|
+
|
14
|
+
os.environ["LC_CTYPE"] = 'en_US.UTF-8'
|
15
|
+
|
16
|
+
this_folder = os.path.dirname(os.path.realpath(__file__))
|
17
|
+
opennlp_folder = os.path.join(this_folder, 'opennlp')
|
18
|
+
model_folder = os.path.join(opennlp_folder, 'models')
|
19
|
+
|
20
|
+
# This updates the load path to ensure that the local site-packages directory
|
21
|
+
# can be used to load packages (e.g. a locally installed copy of lxml).
|
22
|
+
sys.path.append(os.path.join(this_folder, 'site-packages/pre_build'))
|
23
|
+
sys.path.append(os.path.join(this_folder, 'site-packages/pre_install'))
|
24
|
+
|
25
|
+
# Config for Dutch
|
26
|
+
pos_model_nl = 'nl-pos-maxent.bin'
|
27
|
+
mapping_pos_filename_nl = 'mapping.postag.wotan.to.opener.csv'
|
28
|
+
|
29
|
+
# Config for German
|
30
|
+
pos_model_de = 'de-pos-maxent.bin'
|
31
|
+
mapping_pos_filename_de = 'mapping.postag.stss.to.opener.csv'
|
32
|
+
|
33
|
+
mapping_postag_to_kaf = None
|
34
|
+
mapping_pos_filename = ""
|
35
|
+
__version__ = '2-May-2013'
|
36
|
+
|
37
|
+
from lxml.etree import ElementTree as ET, Element as EL, PI
|
38
|
+
from VUKafParserPy.KafParserMod import KafParser
|
39
|
+
from token_matcher import token_matcher
|
40
|
+
|
41
|
+
def map_pos_tag(pos):
|
42
|
+
global mapping_postag_to_kaf
|
43
|
+
if mapping_postag_to_kaf is None:
|
44
|
+
mapping_postag_to_kaf = {}
|
45
|
+
file_mapping = os.path.join(this_folder,mapping_pos_filename)
|
46
|
+
fic = open(file_mapping,'r')
|
47
|
+
for line in fic:
|
48
|
+
fields = line.strip().split('\t')
|
49
|
+
if len(fields)==3:
|
50
|
+
wotan_pos = fields[0]
|
51
|
+
kaf_pos = fields[1]
|
52
|
+
mapping_postag_to_kaf[wotan_pos] = kaf_pos
|
53
|
+
fic.close()
|
54
|
+
opener_pos = mapping_postag_to_kaf.get(pos,'O')
|
55
|
+
return opener_pos
|
56
|
+
|
57
|
+
|
58
|
+
if __name__=='__main__':
|
59
|
+
|
60
|
+
if sys.stdin.isatty():
|
61
|
+
print>>sys.stderr,'Input stream required.'
|
62
|
+
print>>sys.stderr,'Example usage: cat myUTF8file.kaf |',sys.argv[0]
|
63
|
+
sys.exit(-1)
|
64
|
+
|
65
|
+
time_stamp = True
|
66
|
+
try:
|
67
|
+
opts, args = getopt.getopt(sys.argv[1:],"l:",["no-time"])
|
68
|
+
for opt, arg in opts:
|
69
|
+
if opt == "--no-time":
|
70
|
+
time_stamp = False
|
71
|
+
except getopt.GetoptError:
|
72
|
+
pass
|
73
|
+
|
74
|
+
|
75
|
+
input_kaf = KafParser(sys.stdin)
|
76
|
+
my_lang = input_kaf.getLanguage()
|
77
|
+
|
78
|
+
if my_lang == 'nl':
|
79
|
+
pos_model= pos_model_nl
|
80
|
+
mapping_pos_filename= mapping_pos_filename_nl
|
81
|
+
elif my_lang =='de':
|
82
|
+
pos_model = pos_model_de
|
83
|
+
mapping_pos_filename = mapping_pos_filename_de
|
84
|
+
else:
|
85
|
+
print>>sys.stdout,'The language of the input KAF is "'+my_lang+'" and only can be Dutch (nl) or German (de)'
|
86
|
+
sys.exit(-1)
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
## Create the input text for
|
93
|
+
reference_tokens = []
|
94
|
+
sentences = []
|
95
|
+
prev_sent='-200'
|
96
|
+
aux = []
|
97
|
+
for word, sent_id, w_id in input_kaf.getTokens():
|
98
|
+
if sent_id != prev_sent:
|
99
|
+
if len(aux) != 0:
|
100
|
+
sentences.append(aux)
|
101
|
+
aux = []
|
102
|
+
aux.append((word,w_id))
|
103
|
+
|
104
|
+
prev_sent = sent_id
|
105
|
+
if len(aux)!=0:
|
106
|
+
sentences.append(aux)
|
107
|
+
|
108
|
+
for sentence in sentences:
|
109
|
+
text = ' '.join(t for t,_ in sentence).encode('utf-8')
|
110
|
+
cmd = [os.path.join(opennlp_folder,'bin/opennlp'), 'POSTagger',os.path.join(model_folder,pos_model)]
|
111
|
+
try:
|
112
|
+
proc = subprocess.Popen(cmd,stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
113
|
+
proc.stdin.write(text)
|
114
|
+
proc.stdin.close()
|
115
|
+
text_with_pos = proc.stdout.read().strip().decode('utf-8') ## variable is unicode
|
116
|
+
proc.terminate()
|
117
|
+
|
118
|
+
except Exception as e:
|
119
|
+
print>>sys.stderr,str(e)
|
120
|
+
sys.exit(-1)
|
121
|
+
|
122
|
+
data = {}
|
123
|
+
new_tokens = []
|
124
|
+
for n, token in enumerate(text_with_pos.split(' ')):
|
125
|
+
position = token.rfind('_')
|
126
|
+
lemma = token[:position]
|
127
|
+
pos = token[position+1:]
|
128
|
+
my_id='t_'+str(n)
|
129
|
+
data[my_id] = (lemma,pos)
|
130
|
+
new_tokens.append((lemma,my_id))
|
131
|
+
|
132
|
+
mapping_tokens = {}
|
133
|
+
token_matcher(sentence,new_tokens,mapping_tokens)
|
134
|
+
for token_new,id_new in new_tokens:
|
135
|
+
lemma,pos = data[id_new]
|
136
|
+
opener_pos = map_pos_tag(pos)
|
137
|
+
if opener_pos in ['N','R','G','V','A','O']:
|
138
|
+
type_term = 'open'
|
139
|
+
else:
|
140
|
+
type_term = 'close'
|
141
|
+
ele_term = EL('term',attrib={'tid':id_new,
|
142
|
+
'type':type_term,
|
143
|
+
'pos':opener_pos,
|
144
|
+
'morphofeat':pos,
|
145
|
+
'lemma':lemma})
|
146
|
+
ref_tokens = mapping_tokens[id_new]
|
147
|
+
ele_span = EL('span')
|
148
|
+
for ref_token in ref_tokens:
|
149
|
+
eleTarget = EL('target',attrib={'id':ref_token})
|
150
|
+
ele_span.append(eleTarget)
|
151
|
+
ele_term.append(ele_span)
|
152
|
+
|
153
|
+
input_kaf.addElementToLayer('terms', ele_term)
|
154
|
+
|
155
|
+
input_kaf.addLinguisticProcessor('Open nlp pos tagger','1.0', 'term', time_stamp)
|
156
|
+
input_kaf.saveToFile(sys.stdout)
|
157
|
+
sys.exit(0)
|
158
|
+
|
159
|
+
|
160
|
+
|