opener-pos-tagger-base 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +110 -0
- data/bin/pos-tagger-base +21 -0
- data/core/mapping.postag.stss.to.opener.csv +52 -0
- data/core/mapping.postag.wotan.to.opener.csv +13 -0
- data/core/opennlp/bin/opennlp +35 -0
- data/core/opennlp/bin/opennlp.bat +35 -0
- data/core/opennlp/lib/jwnl-1.3.3.jar +0 -0
- data/core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar +0 -0
- data/core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar +0 -0
- data/core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar +0 -0
- data/core/opennlp/models/de-pos-maxent.bin +0 -0
- data/core/opennlp/models/de-pos-perceptron.bin +0 -0
- data/core/opennlp/models/nl-pos-maxent.bin +0 -0
- data/core/opennlp/models/nl-pos-perceptron.bin +0 -0
- data/core/pos-tagger_open-nlp.py +160 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/token_matcher.py +80 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/pos_taggers/base.rb +90 -0
- data/lib/opener/pos_taggers/base/version.rb +7 -0
- data/opener-pos-tagger-base.gemspec +29 -0
- data/pre_build_requirements.txt +1 -0
- metadata +132 -0
Binary file
|
@@ -0,0 +1,7 @@
|
|
1
|
+
## version = 0.2
|
2
|
+
## Added timestamp to function addLinguisitcProcessor
|
3
|
+
## 24-april-2013 --> getSingleEntieies and getSingleProperties reads both entities/props in format
|
4
|
+
## entities -> entity -> span -> target and entities -> entity -> references -> span
|
5
|
+
####
|
6
|
+
|
7
|
+
from KafParserMod import KafParser
|
Binary file
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
|
4
|
+
#####
|
5
|
+
# 4-Mar-2013 : modified order of rules to check first if there is a merge and then if it is an extra token
|
6
|
+
# becuase of this case, where can be both: [ .. . ] [ . . . ]
|
7
|
+
|
8
|
+
|
9
|
+
def add_match(d,id_new,id_ref):
|
10
|
+
if id_new in d:
|
11
|
+
d[id_new].append(id_ref)
|
12
|
+
else:
|
13
|
+
d[id_new]=[id_ref]
|
14
|
+
|
15
|
+
|
16
|
+
def token_matcher(l_ref,l_new,super_d):
|
17
|
+
#print l_ref
|
18
|
+
#print l_new
|
19
|
+
if len(l_new)==0:
|
20
|
+
return
|
21
|
+
else:
|
22
|
+
token_ref, id_ref = l_ref[0]
|
23
|
+
token_new, id_new = l_new[0]
|
24
|
+
if token_ref == token_new:
|
25
|
+
#print 'Matching ',l_ref[0],l_new[0]
|
26
|
+
#print 'A',l_ref[0],l_new[0]
|
27
|
+
add_match(super_d,id_new,id_ref)
|
28
|
+
token_matcher(l_ref[1:],l_new[1:],super_d)
|
29
|
+
else:
|
30
|
+
if token_ref.startswith(token_new) : ##There was an split
|
31
|
+
#print 'D'
|
32
|
+
aux = (token_ref[len(token_new):],id_ref)
|
33
|
+
l_ref[0]=aux
|
34
|
+
|
35
|
+
add_match(super_d,id_new,id_ref)
|
36
|
+
token_matcher(l_ref,l_new[1:],super_d)
|
37
|
+
|
38
|
+
elif token_new.startswith(token_ref) : ##There was a merge
|
39
|
+
#print 'E'
|
40
|
+
aux = (token_new[len(token_ref):],id_new)
|
41
|
+
l_new[0]=aux
|
42
|
+
add_match(super_d,id_new,id_ref)
|
43
|
+
token_matcher(l_ref[1:],l_new,super_d)
|
44
|
+
|
45
|
+
|
46
|
+
elif len(l_new)>1 and l_new[1][0]==token_ref: ## There is an extra token in l_new
|
47
|
+
#print 'B',l_new[1][0],token_ref
|
48
|
+
token_matcher(l_ref[0:],l_new[1:],super_d)
|
49
|
+
|
50
|
+
|
51
|
+
elif len(l_ref)>1 and l_ref[1][0] == token_new: ## There is an extra token in l_ref
|
52
|
+
#print 'C',l_ref[1:],l_new[0:]
|
53
|
+
token_matcher(l_ref[1:],l_new[0:],super_d)
|
54
|
+
|
55
|
+
|
56
|
+
else: ## Imposible matching
|
57
|
+
#print 'F'
|
58
|
+
#print 'Impossible match of ',l_new[0],l_ref[0]
|
59
|
+
token_matcher(l_ref[1:],l_new[1:],super_d)
|
60
|
+
|
61
|
+
|
62
|
+
if __name__ == '__main__':
|
63
|
+
l1 = []
|
64
|
+
s1 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau (Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
|
65
|
+
|
66
|
+
s1 = 'Th is is a very simple example'
|
67
|
+
for n,t in enumerate(s1.split(' ')):
|
68
|
+
l1.append((t,'id'+str(n)))
|
69
|
+
|
70
|
+
l2 = []
|
71
|
+
#s2 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau ( Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
|
72
|
+
s2 = 'This is a very sim ple example'
|
73
|
+
for n,t in enumerate(s2.split(' ')):
|
74
|
+
l2.append((t,'id'+str(n)))
|
75
|
+
|
76
|
+
super_d = {}
|
77
|
+
token_matcher(l1,l2,super_d)
|
78
|
+
print l1
|
79
|
+
print l2
|
80
|
+
print super_d
|
data/ext/hack/support.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'opener/build-tools'
|
2
|
+
|
3
|
+
include Opener::BuildTools::Requirements
|
4
|
+
include Opener::BuildTools::Python
|
5
|
+
include Opener::BuildTools::Files
|
6
|
+
|
7
|
+
# Directory where packages will be installed to.
|
8
|
+
PYTHON_SITE_PACKAGES = File.expand_path(
|
9
|
+
'../../../core/site-packages',
|
10
|
+
__FILE__
|
11
|
+
)
|
12
|
+
|
13
|
+
# Directory containing the temporary files.
|
14
|
+
TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
|
15
|
+
|
16
|
+
# Path to the pip requirements file used to install requirements before
|
17
|
+
# packaging the Gem.
|
18
|
+
PRE_BUILD_REQUIREMENTS = File.expand_path(
|
19
|
+
'../../../pre_build_requirements.txt',
|
20
|
+
__FILE__
|
21
|
+
)
|
22
|
+
|
23
|
+
# Path to the pip requirements file used to install requirements upon Gem
|
24
|
+
# installation.
|
25
|
+
PRE_INSTALL_REQUIREMENTS = File.expand_path(
|
26
|
+
'../../../pre_install_requirements.txt',
|
27
|
+
__FILE__
|
28
|
+
)
|
29
|
+
|
30
|
+
##
|
31
|
+
# Verifies the requirements to install thi Gem.
|
32
|
+
#
|
33
|
+
def verify_requirements
|
34
|
+
require_executable('python')
|
35
|
+
require_version('python', python_version, '2.6.0')
|
36
|
+
require_executable('pip')
|
37
|
+
require_version('pip', pip_version, '1.3.1')
|
38
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require_relative 'base/version'
|
3
|
+
|
4
|
+
module Opener
|
5
|
+
module POSTaggers
|
6
|
+
##
|
7
|
+
# The base POS tagger that supports Dutch and German.
|
8
|
+
#
|
9
|
+
# @!attribute [r] args
|
10
|
+
# @return [Array]
|
11
|
+
# @!attribute [r] options
|
12
|
+
# @return [Hash]
|
13
|
+
#
|
14
|
+
class Base
|
15
|
+
attr_reader :args, :options
|
16
|
+
|
17
|
+
##
|
18
|
+
# @param [Hash] options
|
19
|
+
#
|
20
|
+
# @option options [Array] :args The commandline arguments to pass to the
|
21
|
+
# underlying Python script.
|
22
|
+
#
|
23
|
+
def initialize(options = {})
|
24
|
+
@args = options.delete(:args) || []
|
25
|
+
@options = options
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Builds the command used to execute the kernel.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
#
|
33
|
+
def command
|
34
|
+
return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Runs the command and returns the output of STDOUT, STDERR and the
|
39
|
+
# process information.
|
40
|
+
#
|
41
|
+
# @param [String] input The input to tag.
|
42
|
+
# @return [Array]
|
43
|
+
#
|
44
|
+
def run(input)
|
45
|
+
return capture(input)
|
46
|
+
end
|
47
|
+
|
48
|
+
protected
|
49
|
+
##
|
50
|
+
# @return [String]
|
51
|
+
#
|
52
|
+
def adjust_python_path
|
53
|
+
site_packages = File.join(core_dir, 'site-packages')
|
54
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# capture3 method doesn't work properly with Jruby, so
|
59
|
+
# this is a workaround
|
60
|
+
#
|
61
|
+
def capture(input)
|
62
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
63
|
+
out_reader = Thread.new { o.read }
|
64
|
+
err_reader = Thread.new { e.read }
|
65
|
+
i.write input
|
66
|
+
i.close
|
67
|
+
[out_reader.value, err_reader.value, t.value]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# @return [String]
|
73
|
+
#
|
74
|
+
def core_dir
|
75
|
+
return File.expand_path('../../../../core', __FILE__)
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# @return [String]
|
80
|
+
#
|
81
|
+
def kernel
|
82
|
+
return File.join(core_dir, 'pos-tagger_open-nlp.py')
|
83
|
+
end
|
84
|
+
end # Base
|
85
|
+
|
86
|
+
class DE < Base
|
87
|
+
end # DE
|
88
|
+
|
89
|
+
end # POSTaggers
|
90
|
+
end # Opener
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.expand_path('../lib/opener/pos_taggers/base/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-pos-tagger-base'
|
5
|
+
gem.version = Opener::POSTaggers::Base::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'POS tagger for Dutch and German using OpenNLP'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
|
11
|
+
gem.required_ruby_version = '>= 1.9.2'
|
12
|
+
|
13
|
+
gem.files = Dir.glob([
|
14
|
+
'core/**/*',
|
15
|
+
'ext/**/*',
|
16
|
+
'lib/**/*',
|
17
|
+
'*.gemspec',
|
18
|
+
'*_requirements.txt',
|
19
|
+
'README.md'
|
20
|
+
]).select { |file| File.file?(file) }
|
21
|
+
|
22
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
23
|
+
|
24
|
+
gem.add_dependency 'opener-build-tools', ['>= 0.2.7']
|
25
|
+
gem.add_dependency 'rake'
|
26
|
+
|
27
|
+
gem.add_development_dependency 'rspec'
|
28
|
+
gem.add_development_dependency 'cucumber'
|
29
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
git+ssh://git@github.com/opener-project/VU-kaf-parser.git#egg=VUKafParserPy
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-pos-tagger-base
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: opener-build-tools
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.7
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: cucumber
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: POS tagger for Dutch and German using OpenNLP
|
70
|
+
email:
|
71
|
+
executables:
|
72
|
+
- pos-tagger-base
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- README.md
|
77
|
+
- bin/pos-tagger-base
|
78
|
+
- core/mapping.postag.stss.to.opener.csv
|
79
|
+
- core/mapping.postag.wotan.to.opener.csv
|
80
|
+
- core/opennlp/bin/opennlp
|
81
|
+
- core/opennlp/bin/opennlp.bat
|
82
|
+
- core/opennlp/lib/jwnl-1.3.3.jar
|
83
|
+
- core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar
|
84
|
+
- core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar
|
85
|
+
- core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar
|
86
|
+
- core/opennlp/models/de-pos-maxent.bin
|
87
|
+
- core/opennlp/models/de-pos-perceptron.bin
|
88
|
+
- core/opennlp/models/nl-pos-maxent.bin
|
89
|
+
- core/opennlp/models/nl-pos-perceptron.bin
|
90
|
+
- core/pos-tagger_open-nlp.py
|
91
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
|
92
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
|
93
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt
|
94
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt
|
95
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
|
96
|
+
- core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
|
97
|
+
- core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc
|
98
|
+
- core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
|
99
|
+
- core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc
|
100
|
+
- core/site-packages/pre_build/VUKafParserPy/__init__.py
|
101
|
+
- core/site-packages/pre_build/VUKafParserPy/__init__.pyc
|
102
|
+
- core/token_matcher.py
|
103
|
+
- ext/hack/support.rb
|
104
|
+
- lib/opener/pos_taggers/base.rb
|
105
|
+
- lib/opener/pos_taggers/base/version.rb
|
106
|
+
- opener-pos-tagger-base.gemspec
|
107
|
+
- pre_build_requirements.txt
|
108
|
+
homepage: http://opener-project.github.com/
|
109
|
+
licenses: []
|
110
|
+
metadata: {}
|
111
|
+
post_install_message:
|
112
|
+
rdoc_options: []
|
113
|
+
require_paths:
|
114
|
+
- lib
|
115
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.9.2
|
120
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
requirements: []
|
126
|
+
rubyforge_project:
|
127
|
+
rubygems_version: 2.2.2
|
128
|
+
signing_key:
|
129
|
+
specification_version: 4
|
130
|
+
summary: POS tagger for Dutch and German using OpenNLP
|
131
|
+
test_files: []
|
132
|
+
has_rdoc:
|