opener-constituent-parser-nl 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -3
- data/core/alpino_parser.py +3 -1
- data/lib/opener/constituent_parsers/nl.rb +1 -1
- data/lib/opener/constituent_parsers/nl/version.rb +1 -1
- data/task/requirements.rake +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 819a202f5bb0bc6e8eb8b653056b3921eb4973c5
|
4
|
+
data.tar.gz: 7b07ac07c45bfcd877112e61b2aceff684eaeec4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 614fbe0ebca264c3348f836911c7ef29d9bff17a9a89951ca4adef52dd5e878e77d9ea66122ad047080b036588c1675e9876de9b2be167fcaec03295f30583ca
|
7
|
+
data.tar.gz: 514153a51bcb96f689ddbf7fb705c9f0570f759d068540ee1e19910dd29c99013e138b2da269cca7fc71d583490d98ed52c208e1ba2ee6962d14fa2fd38fd58a
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
[![Build Status](https://drone.io/github.com/opener-project/constituent-parser-nl/status.png)](https://drone.io/github.com/opener-project/constituent-parser-nl/latest)
|
2
|
-
|
3
1
|
Constituent-parser-nl
|
4
2
|
=======
|
5
3
|
|
@@ -38,4 +36,4 @@ Contact
|
|
38
36
|
------
|
39
37
|
* Ruben Izquierdo
|
40
38
|
* Vrije University of Amsterdam
|
41
|
-
* ruben.izquierdobevia@vu.nl
|
39
|
+
* ruben.izquierdobevia@vu.nl
|
data/core/alpino_parser.py
CHANGED
@@ -32,6 +32,8 @@ this_name = 'alpino kaf constituency parser'
|
|
32
32
|
this_layer = 'constituents'
|
33
33
|
|
34
34
|
#### SET THIS VARIABLE TO YOUR LOCAL FOLDER OF ALPINO
|
35
|
+
os.environ['SP_CTYPE']='utf8'
|
36
|
+
os.environ['SP_CSETLEN']='212'
|
35
37
|
ALPINO_HOME = os.environ['ALPINO_HOME']
|
36
38
|
|
37
39
|
logging.basicConfig(stream=sys.stderr,format='%(asctime)s - %(levelname)s - %(message)s',level=logging.DEBUG)
|
@@ -73,6 +75,7 @@ def xml_to_penn(filename):
|
|
73
75
|
## to iso-8859-1, but the real encoding is UTF-8. So we need to force to use this encoding
|
74
76
|
|
75
77
|
parser = etree.XMLParser(encoding='UTF-8')
|
78
|
+
#parser = etree.XMLParser(encoding='ISO-8859-1')
|
76
79
|
tree = etree.parse(filename,parser)
|
77
80
|
|
78
81
|
str = node_to_penn(tree.find('node'))
|
@@ -159,7 +162,6 @@ for sentence in sentences:
|
|
159
162
|
token = token.replace('[','\[')
|
160
163
|
token = token.replace(']','\]')
|
161
164
|
token = token.replace('|','\|')
|
162
|
-
#print>>sys.stderr,token.encode('utf-8'),
|
163
165
|
alpino_pro.stdin.write(token.encode('utf-8')+' ')
|
164
166
|
alpino_pro.stdin.write('\n')
|
165
167
|
#print>>sys.stderr
|
data/task/requirements.rake
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-constituent-parser-nl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|