opener-constituent-parser-nl 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -3
- data/core/alpino_parser.py +3 -1
- data/lib/opener/constituent_parsers/nl.rb +1 -1
- data/lib/opener/constituent_parsers/nl/version.rb +1 -1
- data/task/requirements.rake +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 819a202f5bb0bc6e8eb8b653056b3921eb4973c5
|
4
|
+
data.tar.gz: 7b07ac07c45bfcd877112e61b2aceff684eaeec4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 614fbe0ebca264c3348f836911c7ef29d9bff17a9a89951ca4adef52dd5e878e77d9ea66122ad047080b036588c1675e9876de9b2be167fcaec03295f30583ca
|
7
|
+
data.tar.gz: 514153a51bcb96f689ddbf7fb705c9f0570f759d068540ee1e19910dd29c99013e138b2da269cca7fc71d583490d98ed52c208e1ba2ee6962d14fa2fd38fd58a
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
[](https://drone.io/github.com/opener-project/constituent-parser-nl/latest)
|
2
|
-
|
3
1
|
Constituent-parser-nl
|
4
2
|
=======
|
5
3
|
|
@@ -38,4 +36,4 @@ Contact
|
|
38
36
|
------
|
39
37
|
* Ruben Izquierdo
|
40
38
|
* Vrije University of Amsterdam
|
41
|
-
* ruben.izquierdobevia@vu.nl
|
39
|
+
* ruben.izquierdobevia@vu.nl
|
data/core/alpino_parser.py
CHANGED
@@ -32,6 +32,8 @@ this_name = 'alpino kaf constituency parser'
|
|
32
32
|
this_layer = 'constituents'
|
33
33
|
|
34
34
|
#### SET THIS VARIABLE TO YOUR LOCAL FOLDER OF ALPINO
|
35
|
+
os.environ['SP_CTYPE']='utf8'
|
36
|
+
os.environ['SP_CSETLEN']='212'
|
35
37
|
ALPINO_HOME = os.environ['ALPINO_HOME']
|
36
38
|
|
37
39
|
logging.basicConfig(stream=sys.stderr,format='%(asctime)s - %(levelname)s - %(message)s',level=logging.DEBUG)
|
@@ -73,6 +75,7 @@ def xml_to_penn(filename):
|
|
73
75
|
## to iso-8859-1, but the real encoding is UTF-8. So we need to force to use this encoding
|
74
76
|
|
75
77
|
parser = etree.XMLParser(encoding='UTF-8')
|
78
|
+
#parser = etree.XMLParser(encoding='ISO-8859-1')
|
76
79
|
tree = etree.parse(filename,parser)
|
77
80
|
|
78
81
|
str = node_to_penn(tree.find('node'))
|
@@ -159,7 +162,6 @@ for sentence in sentences:
|
|
159
162
|
token = token.replace('[','\[')
|
160
163
|
token = token.replace(']','\]')
|
161
164
|
token = token.replace('|','\|')
|
162
|
-
#print>>sys.stderr,token.encode('utf-8'),
|
163
165
|
alpino_pro.stdin.write(token.encode('utf-8')+' ')
|
164
166
|
alpino_pro.stdin.write('\n')
|
165
167
|
#print>>sys.stderr
|
data/task/requirements.rake
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-constituent-parser-nl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|